001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerDameng; 009import gudusoft.gsqlparser.TParserDameng; 010import gudusoft.gsqlparser.TSourceToken; 011import gudusoft.gsqlparser.TSourceTokenList; 012import gudusoft.gsqlparser.TStatementList; 013import gudusoft.gsqlparser.TSyntaxError; 014import gudusoft.gsqlparser.EFindSqlStateType; 015import gudusoft.gsqlparser.ESqlPlusCmd; 016import gudusoft.gsqlparser.ETokenType; 017import gudusoft.gsqlparser.ETokenStatus; 018import gudusoft.gsqlparser.ESqlStatementType; 019import gudusoft.gsqlparser.EErrorType; 020import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 021import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 022import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 023import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 024import gudusoft.gsqlparser.compiler.TContext; 025import gudusoft.gsqlparser.sqlenv.TSQLEnv; 026import gudusoft.gsqlparser.compiler.TGlobalScope; 027import gudusoft.gsqlparser.compiler.TFrame; 028import gudusoft.gsqlparser.resolver.TSQLResolver; 029import gudusoft.gsqlparser.TLog; 030import gudusoft.gsqlparser.TGSqlParser; 031import gudusoft.gsqlparser.compiler.TASTEvaluator; 032import java.util.Stack; 033 034import java.io.BufferedReader; 035import java.util.ArrayList; 036import java.util.List; 037 038/** 039 * Dameng database SQL parser implementation. 040 * 041 * <p>This parser handles Dameng-specific SQL syntax including: 042 * <ul> 043 * <li>PL/SQL blocks (procedures, functions, packages, triggers)</li> 044 * <li>SQL*Plus commands (spool, set, show, etc.)</li> 045 * <li>Dameng-specific DML/DDL (MERGE, flashback, etc.)</li> 046 * <li>Dameng analytical functions and extensions</li> 047 * <li>Special token handling (INNER, NOT DEFERRABLE, etc.)</li> 048 * </ul> 049 * 050 * <p><b>Implementation Status:</b> PHASE 3 - IN PROGRESS 051 * <ul> 052 * <li><b>Completed:</b> Dameng classes (TLexerDameng, TParserDameng, TParserDameng) are now PUBLIC</li> 053 * <li><b>Current:</b> Skeleton implementation delegates to legacy TGSqlParser</li> 054 * <li><b>Next:</b> Extract vendor-specific logic from TGSqlParser into this class</li> 055 * <li><b>Goal:</b> Fully self-contained Dameng parser using AbstractSqlParser template</li> 056 * </ul> 057 * 058 * <p><b>Design Notes:</b> 059 * <ul> 060 * <li>Implements {@link SqlParser} directly (will extend {@link AbstractSqlParser} in Phase 4)</li> 061 * <li>Can now directly instantiate: {@link TLexerDameng}, {@link TParserDameng}, {@link TParserDameng}</li> 062 * <li>Uses two parsers: TParserDameng (SQL) + TParserDameng (PL/SQL blocks)</li> 063 * <li>Handles SQL*Plus commands via special tokenization logic</li> 064 * <li>Delimiter character: '/' for PL/SQL blocks, ';' for SQL statements</li> 065 * </ul> 066 * 067 * <p><b>Usage Example:</b> 068 * <pre> 069 * // Get Dameng parser from factory 070 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvdameng); 071 * 072 * // Build context 073 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvdameng) 074 * .sqlText("SELECT * FROM emp WHERE deptno = 10") 075 * .build(); 076 * 077 * // Parse 078 * SqlParseResult result = parser.parse(context); 079 * 080 * // Access statements 081 * TStatementList statements = result.getSqlStatements(); 082 * </pre> 083 * 084 * <p><b>Phase 3 Extraction Roadmap:</b> 085 * <ol> 086 * <li>✅ DONE: Make TLexerDameng, TParserDameng, TParserDameng public</li> 087 * <li>⏳ TODO: Extract tokenization logic (~367 lines from TGSqlParser.dodamengsqltexttotokenlist())</li> 088 * <li>⏳ TODO: Extract raw statement logic (~200 lines from TGSqlParser.dodamenggetrawsqlstatements())</li> 089 * <li>⏳ TODO: Extract parsing orchestration (SQL vs PL/SQL parser selection)</li> 090 * <li>⏳ TODO: Extract helper methods (getanewsourcetoken, getprevsolidtoken, etc.)</li> 091 * <li>⏳ TODO: Extend AbstractSqlParser and use template method pattern fully</li> 092 * <li>⏳ TODO: Remove all delegation to TGSqlParser</li> 093 * </ol> 094 * 095 * <p><b>Key Methods to Extract from TGSqlParser:</b> 096 * <ul> 097 * <li>{@code dodamengsqltexttotokenlist()} - Dameng tokenization with SQL*Plus command detection</li> 098 * <li>{@code dodamenggetrawsqlstatements()} - Dameng raw statement boundaries (handles PL/SQL blocks)</li> 099 * <li>{@code getanewsourcetoken()} - Token iterator from lexer</li> 100 * <li>{@code getprevsolidtoken()} - Navigate token list backwards</li> 101 * <li>{@code IsValidPlaceForDivToSqlplusCmd()} - Slash vs divide operator disambiguation</li> 102 * <li>{@code countLines()} - Multi-line token handling</li> 103 * <li>{@code spaceAtTheEndOfReturnToken()} - SQL*Plus command validation</li> 104 * </ul> 105 * 106 * @see SqlParser 107 * @see AbstractSqlParser 108 * @see TLexerDameng 109 * @see TParserDameng 110 * @see TParserDameng 111 * @since 3.2.0.0 112 */ 113public class DamengSqlParser extends AbstractSqlParser { 114 115 /** 116 * Construct Dameng SQL parser. 117 * <p> 118 * Configures the parser for Dameng database with default delimiters: 119 * <ul> 120 * <li>SQL statements: semicolon (;)</li> 121 * <li>PL/SQL blocks: forward slash (/)</li> 122 * </ul> 123 * <p> 124 * Following the original TGSqlParser pattern, the lexer and parsers are 125 * created once in the constructor and reused for all parsing operations. 126 * This avoids unnecessary object allocation overhead since the parser 127 * is not thread-safe and designed for single-use per instance. 128 */ 129 public DamengSqlParser() { 130 super(EDbVendor.dbvdameng); 131 this.delimiterChar = '/'; // PL/SQL delimiter 132 this.defaultDelimiterStr = ";"; // SQL delimiter 133 134 // Create lexer once - will be reused for all parsing operations 135 // (matches original TGSqlParser constructor pattern at line 1033) 136 this.flexer = new TLexerDameng(); 137 this.flexer.delimiterchar = this.delimiterChar; 138 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 139 140 // Set parent's lexer reference for shared tokenization logic 141 this.lexer = this.flexer; 142 143 // Create parsers once - will be reused for all parsing operations 144 // Token list will be set/updated when parsing begins 145 // (matches original TGSqlParser constructor pattern at lines 1036-1040) 146 this.fparser = new TParserDameng(null); 147 this.fplsqlparser = new TParserDameng(null); 148 this.fparser.lexer = this.flexer; 149 this.fplsqlparser.lexer = this.flexer; 150 151 // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor 152 } 153 154 // ========== Tokenization State (used during tokenization) ========== 155 // These instance variables are used during the tokenization process 156 // and are set up at the beginning of tokenization 157 158 /** The Dameng lexer used for tokenization */ 159 public TLexerDameng flexer; // Package-accessible for TGSqlParser integration 160 161 // NOTE: sourcetokenlist moved to AbstractSqlParser (inherited) 162 163 /** Optional callback for token processing (can be null) */ 164 private Object tokenHandle; // TTokenCallback interface - keeping as Object for now 165 166 // State variables for tokenization (set during dodamengsqltexttotokenlist()) 167 private boolean continuesqlplusatnewline; 168 private boolean waitingreturnforsemicolon; 169 private boolean waitingreturnforfloatdiv; 170 private boolean isvalidplace; 171 private boolean insqlpluscmd; 172 173 // ========== Statement Parsing State (used during statement parsing) ========== 174 // These instance variables are used during the statement parsing process 175 176 // NOTE: The following fields moved to AbstractSqlParser (inherited): 177 // - sqlcmds (ISqlCmds) 178 // - sqlstatements (TStatementList) 179 // - parserContext (ParserContext) 180 181 /** Current statement being built */ 182 private TCustomSqlStatement gcurrentsqlstatement; 183 184 /** SQL parser (for regular SQL statements) */ 185 private TParserDameng fparser; 186 187 /** PL/SQL parser (for PL/SQL blocks) */ 188 private TParserDameng fplsqlparser; 189 190 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 191 // - protected TContext globalContext 192 // - protected TSQLEnv sqlEnv 193 // - protected Stack<TFrame> frameStack 194 // - protected TFrame globalFrame 195 196 // ========== Enums for State Machine ========== 197 // These enums are used by the dodamenggetrawsqlstatements state machine 198 199 enum stored_procedure_status {start,is_as,body,bodyend,end, cursor_declare}; 200 enum stored_procedure_type {function,procedure,package_spec,package_body, block_with_begin,block_with_declare, 201 create_trigger,create_library,cursor_in_package_spec,others}; 202 203 static final int stored_procedure_nested_level = 1024; 204 205 // ========== AbstractSqlParser Abstract Methods Implementation ========== 206 207 /** 208 * Return the Dameng lexer instance. 209 * <p> 210 * The lexer is created once in the constructor and reused for all 211 * parsing operations. This method simply returns the existing instance, 212 * matching the original TGSqlParser pattern where the lexer is created 213 * once and reset before each use. 214 * 215 * @param context parser context (not used, lexer already created) 216 * @return the Dameng lexer instance created in constructor 217 */ 218 @Override 219 protected TCustomLexer getLexer(ParserContext context) { 220 // Return existing lexer instance (created in constructor) 221 // No need to create new instance - matches original TGSqlParser pattern 222 return this.flexer; 223 } 224 225 /** 226 * Return the Dameng SQL parser instance with updated token list. 227 * <p> 228 * The parser is created once in the constructor and reused for all 229 * parsing operations. This method updates the token list and returns 230 * the existing instance, matching the original TGSqlParser pattern. 231 * 232 * @param context parser context (not used, parser already created) 233 * @param tokens source token list to parse 234 * @return the Dameng SQL parser instance created in constructor 235 */ 236 @Override 237 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 238 // Update token list for reused parser instance 239 this.fparser.sourcetokenlist = tokens; 240 return this.fparser; 241 } 242 243 /** 244 * Return the Dameng PL/SQL parser instance with updated token list. 245 * <p> 246 * Dameng needs a secondary parser (TParserDameng) for PL/SQL blocks 247 * (procedures, functions, packages, triggers, anonymous blocks). 248 * <p> 249 * The parser is created once in the constructor and reused for all 250 * parsing operations. This method updates the token list and returns 251 * the existing instance, matching the original TGSqlParser pattern. 252 * 253 * @param context parser context (not used, parser already created) 254 * @param tokens source token list to parse 255 * @return the Dameng PL/SQL parser instance created in constructor 256 */ 257 @Override 258 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 259 // Update token list for reused parser instance 260 this.fplsqlparser.sourcetokenlist = tokens; 261 return this.fplsqlparser; 262 } 263 264 /** 265 * Call Dameng-specific tokenization logic. 266 * <p> 267 * Delegates to dodamengsqltexttotokenlist which handles Dameng's 268 * specific keyword recognition, SQL*Plus commands, forward slash 269 * disambiguation, and token generation. 270 */ 271 @Override 272 protected void tokenizeVendorSql() { 273 dodamengsqltexttotokenlist(); 274 } 275 276 /** 277 * Post-tokenization: merge ${...} template variable tokens into single IDENT tokens. 278 * Template syntax like ${if(len(X) == 0, "", "...")} is used by BI tools. 279 */ 280 @Override 281 protected void doAfterTokenize(TSourceTokenList tokens) { 282 super.doAfterTokenize(tokens); 283 mergeTemplateVariableTokens(tokens); 284 } 285 286 private void mergeTemplateVariableTokens(TSourceTokenList tokens) { 287 for (int i = 0; i < tokens.size() - 1; i++) { 288 TSourceToken dollar = tokens.get(i); 289 290 // Match either bare '$' (self-char) or '$IDENT' like $P, $X (identifier starting with $) 291 boolean isDollarChar = (dollar.tokencode == '$'); 292 boolean isDollarIdent = (dollar.tokencode == TBaseType.ident 293 && dollar.astext != null && dollar.astext.startsWith("$")); 294 if (!isDollarChar && !isDollarIdent) continue; 295 296 // Find next non-whitespace token — for $IDENT pattern, require immediate '{' (no whitespace) 297 int braceIdx = i + 1; 298 if (isDollarChar) { 299 while (braceIdx < tokens.size() && tokens.get(braceIdx).tokentype == ETokenType.ttwhitespace) { 300 braceIdx++; 301 } 302 } 303 if (braceIdx >= tokens.size() || tokens.get(braceIdx).tokencode != '{') continue; 304 305 // Found ${ pattern — find matching } with depth tracking 306 int depth = 1; 307 int endIdx = braceIdx + 1; 308 boolean isComplex = false; 309 while (endIdx < tokens.size() && depth > 0) { 310 int code = tokens.get(endIdx).tokencode; 311 if (code == '{') depth++; 312 else if (code == '}') depth--; 313 else if (code == '(' || code == ',' || code == '\'' || code == '"') isComplex = true; 314 if (depth > 0) endIdx++; 315 } 316 if (depth != 0) continue; // unclosed 317 318 // Build merged token text 319 StringBuilder sb = new StringBuilder(); 320 for (int j = i; j <= endIdx; j++) { 321 sb.append(tokens.get(j).astext); 322 } 323 324 if (isComplex && isDollarChar) { 325 // Complex template starting with bare $ like ${if(len(X)==0,...)} 326 // These expand to SQL fragments (e.g., AND clauses) at runtime, 327 // so convert to whitespace to let parser skip them entirely. 328 for (int j = i; j <= endIdx; j++) { 329 tokens.get(j).tokentype = ETokenType.ttwhitespace; 330 tokens.get(j).tokencode = TBaseType.lexspace; 331 } 332 } else { 333 // Simple template like ${NAME}, or JasperReports $P{VAR}/$X{IN,COL,PARAM} 334 // These expand to single values/expressions, so merge into IDENT placeholder. 335 dollar.astext = sb.toString(); 336 dollar.tokencode = TBaseType.ident; 337 dollar.tokentype = ETokenType.ttidentifier; 338 // Convert remaining tokens to whitespace so parser skips them even if 339 // tokenstatus is overwritten by statement splitter (tsignoredbygetrawstatement) 340 for (int j = i + 1; j <= endIdx; j++) { 341 tokens.get(j).tokentype = ETokenType.ttwhitespace; 342 tokens.get(j).tokencode = TBaseType.lexspace; 343 } 344 } 345 346 i = endIdx; // skip past merged tokens 347 } 348 } 349 350 /** 351 * Setup Dameng parsers for raw statement extraction. 352 * <p> 353 * Dameng uses dual parsers (SQL + PL/SQL), so we inject sqlcmds and 354 * update token lists for both parsers. 355 */ 356 @Override 357 protected void setupVendorParsersForExtraction() { 358 // Inject sqlcmds into BOTH parsers (SQL + PL/SQL) 359 this.fparser.sqlcmds = this.sqlcmds; 360 this.fplsqlparser.sqlcmds = this.sqlcmds; 361 362 // Update token list for BOTH parsers 363 this.fparser.sourcetokenlist = this.sourcetokenlist; 364 this.fplsqlparser.sourcetokenlist = this.sourcetokenlist; 365 } 366 367 /** 368 * Call Dameng-specific raw statement extraction logic. 369 * <p> 370 * Delegates to dodamenggetrawsqlstatements which handles Dameng's 371 * statement delimiters (semicolon and forward slash). 372 */ 373 @Override 374 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 375 dodamenggetrawsqlstatements(builder); 376 } 377 378 /** 379 * Perform full parsing of statements with syntax checking. 380 * <p> 381 * This method orchestrates the parsing of all statements by: 382 * <ul> 383 * <li>Using the raw statements passed from AbstractSqlParser.parse()</li> 384 * <li>Initializing SQL and PL/SQL parsers</li> 385 * <li>Creating global context and frame stack</li> 386 * <li>Looping through each raw statement</li> 387 * <li>Calling parsestatement() on each to build AST</li> 388 * <li>Handling error recovery for CREATE TABLE/INDEX</li> 389 * <li>Collecting syntax errors</li> 390 * </ul> 391 * 392 * <p><b>Important:</b> This method does NOT extract raw statements - they are 393 * passed in as a parameter already extracted by {@link #extractRawStatements}. 394 * This eliminates duplicate extraction that was occurring in the old design. 395 * 396 * <p>Extracted from: TGSqlParser.doparse() lines 16903-17026 397 * 398 * @param context parser context 399 * @param parser main SQL parser (TParserDameng) 400 * @param secondaryParser PL/SQL parser (TParserDameng) 401 * @param tokens source token list 402 * @param rawStatements raw statements already extracted (never null) 403 * @return list of fully parsed statements with AST built 404 */ 405 @Override 406 protected TStatementList performParsing(ParserContext context, 407 TCustomParser parser, 408 TCustomParser secondaryParser, 409 TSourceTokenList tokens, 410 TStatementList rawStatements) { 411 // Store references 412 this.fparser = (TParserDameng) parser; 413 this.fplsqlparser = (TParserDameng) secondaryParser; 414 this.sourcetokenlist = tokens; 415 this.parserContext = context; 416 417 // Use the raw statements passed from AbstractSqlParser.parse() 418 // (already extracted - DO NOT re-extract to avoid duplication) 419 this.sqlstatements = rawStatements; 420 421 // Initialize statement parsing infrastructure 422 this.sqlcmds = SqlCmdsFactory.get(vendor); 423 424 // Inject sqlcmds into parsers (required for make_stmt and other methods) 425 this.fparser.sqlcmds = this.sqlcmds; 426 this.fplsqlparser.sqlcmds = this.sqlcmds; 427 428 // Initialize global context for semantic analysis 429 // CRITICAL: When delegated from TGSqlParser, use TGSqlParser's frameStack 430 // so that variables set in statements can be found by other statements 431 if (context != null && context.getGsqlparser() != null) { 432 TGSqlParser gsqlparser = (TGSqlParser) context.getGsqlparser(); 433 this.frameStack = gsqlparser.getFrameStack(); 434 435 // CRITICAL: Set gsqlparser on the NodeFactory - matches TGSqlParser behavior 436 // This is needed for proper AST node creation during parsing 437 // Without this, expression traversal order may differ, causing 438 // dataflow constant ordering issues 439 this.fparser.getNf().setGsqlParser(gsqlparser); 440 this.fplsqlparser.getNf().setGsqlParser(gsqlparser); 441 442 // Create global context if needed 443 this.globalContext = new TContext(); 444 this.sqlEnv = new TSQLEnv(this.vendor) { 445 @Override 446 public void initSQLEnv() { 447 } 448 }; 449 this.globalContext.setSqlEnv(this.sqlEnv, this.sqlstatements); 450 } else { 451 initializeGlobalContext(); 452 } 453 454 // Parse each statement with exception handling for robustness 455 for (int i = 0; i < sqlstatements.size(); i++) { 456 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 457 458 try { 459 stmt.setFrameStack(frameStack); 460 461 // Parse the statement 462 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 463 464 // Dameng-specific post-processing (overridden hook method) 465 afterStatementParsed(stmt); 466 467 // Handle error recovery for CREATE TABLE/INDEX 468 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 469 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 470 handleCreateTableErrorRecovery(stmt); 471 } 472 473 // Collect syntax errors 474 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 475 copyErrorsFromStatement(stmt); 476 } 477 478 } catch (Exception ex) { 479 // Use inherited exception handler from AbstractSqlParser 480 // This provides consistent error handling across all database parsers 481 handleStatementParsingException(stmt, i, ex); 482 continue; 483 } 484 } 485 486 // Clean up frame stack 487 if (globalFrame != null) { 488 globalFrame.popMeFromStack(frameStack); 489 } 490 491 return this.sqlstatements; 492 } 493 494 // Note: initializeGlobalContext() inherited from AbstractSqlParser 495 496 /** 497 * Override to provide Dameng-specific post-processing after statement parsing. 498 * <p> 499 * For Dameng, we check if the statement is PL/SQL and recursively find syntax 500 * errors in nested PL/SQL statements. 501 */ 502 @Override 503 protected void afterStatementParsed(TCustomSqlStatement stmt) { 504 if (stmt.isoracleplsql()) { 505 findAllSyntaxErrorsInPlsql(stmt); 506 } 507 } 508 509 /** 510 * Perform Dameng-specific semantic analysis using TSQLResolver. 511 * 512 * <p>This includes: 513 * <ul> 514 * <li>Column-to-table resolution</li> 515 * <li>Dataflow analysis</li> 516 * <li>Reference resolution</li> 517 * <li>Scope resolution</li> 518 * </ul> 519 * 520 * @param context the parser context 521 * @param statements the parsed statements 522 */ 523 @Override 524 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 525 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 526 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 527 resolver.resolve(); 528 } 529 } 530 531 /** 532 * Perform Dameng-specific AST interpretation/evaluation using TASTEvaluator. 533 * 534 * <p>This executes simple SQL statements and evaluates expressions 535 * for static analysis and constant folding. 536 * 537 * @param context the parser context 538 * @param statements the parsed statements 539 */ 540 @Override 541 protected void performInterpreter(ParserContext context, TStatementList statements) { 542 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 543 TLog.clearLogs(); 544 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 545 TLog.enableInterpreterLogOnly(); 546 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 547 astEvaluator.eval(); 548 } 549 } 550 551 // ========== Raw Statement Extraction ========== 552 // These methods extract raw SQL statements from tokens without full parsing 553 // Extracted from TGSqlParser.dodamenggetrawsqlstatements() and related methods 554 555 /** 556 * Extract raw Dameng SQL statements from tokenized source. 557 * <p> 558 * This is the main Dameng statement extraction state machine that: 559 * <ul> 560 * <li>Groups tokens into statement boundaries</li> 561 * <li>Identifies statement types (SQL vs PL/SQL, SQL*Plus commands)</li> 562 * <li>Handles nested PL/SQL blocks (procedures, functions, packages, triggers)</li> 563 * <li>Tracks BEGIN/END pairs and other block delimiters</li> 564 * <li>Detects statement terminators (semicolon, forward slash, period)</li> 565 * </ul> 566 * 567 * <p><b>State Machine:</b> Uses 4 main states: 568 * <ul> 569 * <li>{@code stnormal} - Between statements, looking for start of next statement</li> 570 * <li>{@code stsql} - Inside a SQL statement</li> 571 * <li>{@code stsqlplus} - Inside a SQL*Plus command</li> 572 * <li>{@code ststoredprocedure} - Inside a PL/SQL block (procedure/function/package/trigger)</li> 573 * <li>{@code sterror} - Error recovery mode</li> 574 * </ul> 575 * 576 * <p><b>Extracted from:</b> TGSqlParser.dodamenggetrawsqlstatements() (lines 10071-10859) 577 * 578 * <p><b>Design Note:</b> This method now receives a builder to populate with results, 579 * following Option A design where the vendor-specific method focuses on parsing logic 580 * while extractRawStatements() handles result construction. 581 * 582 * @param builder the result builder to populate with statements and error information 583 */ 584 private void dodamenggetrawsqlstatements(SqlParseResult.Builder builder) { 585 int waitingEnds[] = new int[stored_procedure_nested_level]; 586 stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level]; 587 stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level]; 588 boolean endBySlashOnly = true; 589 int nestedProcedures = 0, nestedParenthesis = 0; 590 // Flag for CREATE MLE MODULE with AS clause - terminates with / not ; 591 boolean mleModuleWithAs = false; 592 // Flag for WITH FUNCTION/PROCEDURE - track BEGIN/END nesting to handle embedded semicolons 593 boolean withPlsqlDefinition = false; 594 int withPlsqlBeginEndNesting = 0; 595 boolean withPlsqlFoundSelect = false; // True when SELECT has been found after WITH FUNCTION 596 // Track whether the current CTE statement's main SELECT has been found 597 // (i.e., the SELECT after WITH name AS (...) at paren level 0) 598 boolean cteMainSelectFound = false; 599 600 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 601 if (!TBaseType.assigned(sourcetokenlist)) { 602 // No tokens available - populate builder with error and return 603 builder.errorCode(1); 604 builder.errorMessage("No source token list available"); 605 builder.sqlStatements(new TStatementList()); 606 return; 607 } 608 609 gcurrentsqlstatement = null; 610 EFindSqlStateType gst = EFindSqlStateType.stnormal; 611 TSourceToken lcprevsolidtoken = null, ast = null; 612 613 // Main tokenization loop 614 for (int i = 0; i < sourcetokenlist.size(); i++) { 615 616 if ((ast != null) && (ast.issolidtoken())) 617 lcprevsolidtoken = ast; 618 619 ast = sourcetokenlist.get(i); 620 sourcetokenlist.curpos = i; 621 622 // Token-specific keyword transformations for Dameng 623 performRawStatementTokenTransformations(ast); 624 625 // State machine processing 626 switch (gst) { 627 case sterror: { 628 if (ast.tokentype == ETokenType.ttsemicolon) { 629 appendToken(gcurrentsqlstatement, ast); 630 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 631 gst = EFindSqlStateType.stnormal; 632 } else { 633 appendToken(gcurrentsqlstatement, ast); 634 } 635 break; 636 } //sterror 637 638 case stnormal: { 639 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 640 || (ast.tokencode == TBaseType.cmtslashstar) 641 || (ast.tokencode == TBaseType.lexspace) 642 || (ast.tokencode == TBaseType.lexnewline) 643 || (ast.tokentype == ETokenType.ttsemicolon)) { 644 if (gcurrentsqlstatement != null) { 645 appendToken(gcurrentsqlstatement, ast); 646 } 647 648 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 649 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 650 // ;;;; continuous semicolon, treat it as comment 651 ast.tokentype = ETokenType.ttsimplecomment; 652 ast.tokencode = TBaseType.cmtdoublehyphen; 653 } 654 } 655 656 continue; 657 } 658 659 if (ast.tokencode == TBaseType.sqlpluscmd) { 660 gst = EFindSqlStateType.stsqlplus; 661 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 662 appendToken(gcurrentsqlstatement, ast); 663 continue; 664 } 665 666 // find a token to start sql or plsql mode 667 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 668 669 if (gcurrentsqlstatement != null) { 670 if (gcurrentsqlstatement.isoracleplsql()) { 671 nestedProcedures = 0; 672 gst = EFindSqlStateType.ststoredprocedure; 673 appendToken(gcurrentsqlstatement, ast); 674 675 switch (gcurrentsqlstatement.sqlstatementtype) { 676 case sstplsql_createprocedure: 677 sptype[nestedProcedures] = stored_procedure_type.procedure; 678 break; 679 case sstplsql_createfunction: 680 sptype[nestedProcedures] = stored_procedure_type.function; 681 break; 682 case sstplsql_createpackage: 683 sptype[nestedProcedures] = stored_procedure_type.package_spec; 684 if (ast.searchToken(TBaseType.rrw_body, 5) != null) { 685 sptype[nestedProcedures] = stored_procedure_type.package_body; 686 } 687 break; 688 case sst_plsql_block: 689 sptype[nestedProcedures] = stored_procedure_type.block_with_declare; 690 if (ast.tokencode == TBaseType.rrw_begin) { 691 sptype[nestedProcedures] = stored_procedure_type.block_with_begin; 692 } 693 break; 694 case sstplsql_createtrigger: 695 sptype[nestedProcedures] = stored_procedure_type.create_trigger; 696 break; 697 case sstoraclecreatelibrary: 698 sptype[nestedProcedures] = stored_procedure_type.create_library; 699 break; 700 case sstplsql_createtype_placeholder: 701 gst = EFindSqlStateType.stsql; 702 break; 703 case sstplsql_createtypebody: 704 sptype[nestedProcedures] = stored_procedure_type.others; 705 break; 706 default: 707 sptype[nestedProcedures] = stored_procedure_type.others; 708 break; 709 } 710 711 if (sptype[0] == stored_procedure_type.block_with_declare) { 712 endBySlashOnly = false; 713 procedure_status[0] = stored_procedure_status.is_as; 714 } else if (sptype[0] == stored_procedure_type.block_with_begin) { 715 endBySlashOnly = false; 716 procedure_status[0] = stored_procedure_status.body; 717 } else if (sptype[0] == stored_procedure_type.procedure) { 718 endBySlashOnly = false; 719 procedure_status[0] = stored_procedure_status.start; 720 } else if (sptype[0] == stored_procedure_type.function) { 721 endBySlashOnly = false; 722 procedure_status[0] = stored_procedure_status.start; 723 } else if (sptype[0] == stored_procedure_type.package_spec) { 724 endBySlashOnly = false; 725 procedure_status[0] = stored_procedure_status.start; 726 } else if (sptype[0] == stored_procedure_type.package_body) { 727 endBySlashOnly = false; 728 procedure_status[0] = stored_procedure_status.start; 729 } else if (sptype[0] == stored_procedure_type.create_trigger) { 730 endBySlashOnly = false; 731 procedure_status[0] = stored_procedure_status.start; 732 } else if (sptype[0] == stored_procedure_type.create_library) { 733 endBySlashOnly = false; 734 procedure_status[0] = stored_procedure_status.bodyend; 735 } else { 736 endBySlashOnly = true; 737 procedure_status[0] = stored_procedure_status.bodyend; 738 } 739 740 if ((ast.tokencode == TBaseType.rrw_begin) 741 || (ast.tokencode == TBaseType.rrw_package) 742 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 743 waitingEnds[nestedProcedures] = 1; 744 } 745 } else { 746 gst = EFindSqlStateType.stsql; 747 appendToken(gcurrentsqlstatement, ast); 748 nestedParenthesis = 0; 749 // Check if this is CREATE MLE MODULE with AS clause (JavaScript code) 750 // If AS is found after LANGUAGE JAVASCRIPT, it terminates with / not ; 751 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclecreatemlemodule) { 752 // Look ahead to see if there's an AS keyword 753 TSourceToken asToken = ast.searchToken(TBaseType.rrw_as, 10); 754 mleModuleWithAs = (asToken != null); 755 } else { 756 mleModuleWithAs = false; 757 } 758 759 // Check if this is WITH FUNCTION/PROCEDURE (Dameng inline PL/SQL) 760 // Need to track BEGIN/END nesting to handle embedded semicolons 761 if (ast.tokencode == TBaseType.rrw_with && gcurrentsqlstatement.isctequery) { 762 // Look ahead for FUNCTION or PROCEDURE keyword 763 TSourceToken nextSolid = ast.nextSolidToken(); 764 if (nextSolid != null && (nextSolid.tokencode == TBaseType.rrw_function 765 || nextSolid.tokencode == TBaseType.rrw_procedure)) { 766 withPlsqlDefinition = true; 767 withPlsqlBeginEndNesting = 0; 768 } 769 } 770 } 771 } else { 772 //error token found 773 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 774 , "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 775 776 ast.tokentype = ETokenType.tttokenlizererrortoken; 777 gst = EFindSqlStateType.sterror; 778 779 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 780 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 781 appendToken(gcurrentsqlstatement, ast); 782 } 783 784 break; 785 } // stnormal 786 787 case stsqlplus: { 788 if (ast.insqlpluscmd) { 789 appendToken(gcurrentsqlstatement, ast); 790 } else { 791 gst = EFindSqlStateType.stnormal; //this token must be newline, 792 appendToken(gcurrentsqlstatement, ast); // so add it here 793 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 794 } 795 796 break; 797 }//case stsqlplus 798 799 case stsql: { 800 // For WITH FUNCTION/PROCEDURE, track BEGIN/END nesting and when SELECT is found 801 if (withPlsqlDefinition) { 802 if (ast.tokencode == TBaseType.rrw_begin) { 803 withPlsqlBeginEndNesting++; 804 } else if (ast.tokencode == TBaseType.rrw_end) { 805 withPlsqlBeginEndNesting--; 806 if (withPlsqlBeginEndNesting < 0) withPlsqlBeginEndNesting = 0; 807 } else if (ast.tokencode == TBaseType.rrw_select && withPlsqlBeginEndNesting == 0) { 808 // Found SELECT after all function definitions are done 809 withPlsqlFoundSelect = true; 810 } 811 } 812 813 // For CREATE MLE MODULE with AS clause, don't terminate on semicolon 814 // The JavaScript code may contain semicolons; wait for / to terminate 815 // For WITH FUNCTION/PROCEDURE, don't terminate on semicolon until SELECT is found 816 // (the semicolons in function body and after END are part of the function definition) 817 boolean skipSemicolonTermination = mleModuleWithAs || (withPlsqlDefinition && !withPlsqlFoundSelect); 818 if (ast.tokentype == ETokenType.ttsemicolon && !skipSemicolonTermination) { 819 gst = EFindSqlStateType.stnormal; 820 appendToken(gcurrentsqlstatement, ast); 821 gcurrentsqlstatement.semicolonended = ast; 822 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 823 mleModuleWithAs = false; // Reset flag 824 withPlsqlDefinition = false; // Reset WITH FUNCTION flag 825 withPlsqlBeginEndNesting = 0; 826 cteMainSelectFound = false; 827 withPlsqlFoundSelect = false; 828 continue; 829 } 830 831 if (sourcetokenlist.sqlplusaftercurtoken()) //most probably is / cmd 832 { 833 gst = EFindSqlStateType.stnormal; 834 appendToken(gcurrentsqlstatement, ast); 835 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 836 mleModuleWithAs = false; // Reset flag 837 continue; 838 } 839 840 if (ast.tokencode == '(') nestedParenthesis++; 841 if (ast.tokencode == ')') { 842 nestedParenthesis--; 843 if (nestedParenthesis < 0) nestedParenthesis = 0; 844 } 845 846 Boolean findNewStmt = false; 847 TCustomSqlStatement lcStmt = null; 848 // Check for new statement: CREATE TABLE (original), or SELECT inside a non-CTE SELECT 849 boolean shouldCheckNewStmt = false; 850 if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable)) { 851 shouldCheckNewStmt = true; 852 } else if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 853 && (ast.tokencode == TBaseType.rrw_select || ast.tokencode == TBaseType.rrw_with)) { 854 // Check if current statement is a CTE (starts with WITH) 855 boolean isCteContext = false; 856 for (int si = 0; si < gcurrentsqlstatement.sourcetokenlist.size(); si++) { 857 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(si); 858 if (st.tokentype == ETokenType.ttwhitespace || st.tokentype == ETokenType.ttreturn 859 || st.tokencode == TBaseType.cmtdoublehyphen || st.tokencode == TBaseType.cmtslashstar) { 860 continue; 861 } 862 if (st.tokencode == TBaseType.rrw_with) { 863 isCteContext = true; 864 } 865 break; 866 } 867 // Don't split if previous token makes this SELECT part of current statement: 868 // - Set operators: UNION, INTERSECT, MINUS, EXCEPT, ALL 869 // - Left paren: (SELECT ...) — SELECT is main query of parenthesized expr 870 boolean suppressSplit = false; 871 if (ast.tokencode == TBaseType.rrw_select && lcprevsolidtoken != null) { 872 int prevCode = lcprevsolidtoken.tokencode; 873 if (prevCode == TBaseType.rrw_union || prevCode == TBaseType.rrw_intersect 874 || prevCode == TBaseType.rrw_minus || prevCode == TBaseType.rrw_except 875 || prevCode == TBaseType.rrw_all 876 || prevCode == '(') { 877 suppressSplit = true; 878 } 879 } 880 if (suppressSplit) { 881 // SELECT is part of current statement — don't split 882 } else if (!isCteContext) { 883 // Non-CTE SELECT: any SELECT/WITH at paren level 0 starts a new statement 884 shouldCheckNewStmt = true; 885 } else if (cteMainSelectFound) { 886 // CTE context: main SELECT already consumed, so this SELECT/WITH 887 // at paren level 0 is a new statement 888 shouldCheckNewStmt = true; 889 } else if (ast.tokencode == TBaseType.rrw_select) { 890 // CTE context: this is the main SELECT after WITH name AS (...) 891 cteMainSelectFound = true; 892 // Don't split — this SELECT is part of the CTE statement 893 } 894 // If ast is WITH and main SELECT not yet found, it could be another 895 // CTE definition (WITH a AS (...), b AS (...)) — don't split 896 } 897 if (shouldCheckNewStmt) { 898 // For SELECT-after-SELECT/WITH splitting, use stnormal so issql can detect CTE starts. 899 // For CREATE TABLE, preserve original stsql state to avoid false positives 900 // (e.g., INSERT/DELETE keywords in blockchain table clauses). 901 EFindSqlStateType issqlState = (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 902 ? EFindSqlStateType.stnormal : gst; 903 lcStmt = sqlcmds.issql(ast, issqlState, gcurrentsqlstatement); 904 if (lcStmt != null) { 905 findNewStmt = true; 906 if (lcStmt.sqlstatementtype == ESqlStatementType.sstselect) { 907 TSourceToken prevst = ast.prevSolidToken(); 908 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable) { 909 // For CREATE TABLE, suppress split when SELECT follows AS/(/): AS (SELECT ...) 910 if ((prevst.tokencode == TBaseType.rrw_as) || (prevst.tokencode == '(') || (prevst.tokencode == ')')) { 911 findNewStmt = false; 912 } 913 } 914 // For SELECT-after-SELECT/WITH splitting at paren level 0, 915 // no suppression needed — the new SELECT/WITH is a new statement 916 } 917 } 918 } 919 920 if (findNewStmt) { 921 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 922 gcurrentsqlstatement = lcStmt; 923 cteMainSelectFound = false; // Reset for new statement 924 nestedParenthesis = 0; // Reset paren tracking for new statement 925 appendToken(gcurrentsqlstatement, ast); 926 continue; 927 } else 928 appendToken(gcurrentsqlstatement, ast); 929 930 break; 931 }//case stsql 932 933 case ststoredprocedure: { 934 935 if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) { 936 appendToken(gcurrentsqlstatement, ast); 937 } 938 939 switch (procedure_status[nestedProcedures]) { 940 case cursor_declare: 941 if (ast.tokencode == ';') { 942 nestedProcedures--; 943 if (nestedProcedures < 0) { 944 nestedProcedures = 0; 945 } 946 } 947 break; 948 case start: 949 if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) { 950 if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) { 951 if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) { 952 // when it's a package specification, only top level accept as/is 953 } else { 954 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 955 if (ast.searchToken("language", 1) != null) { 956 if (nestedProcedures == 0) { 957 gst = EFindSqlStateType.stsql; 958 } else { 959 procedure_status[nestedProcedures] = stored_procedure_status.body; 960 nestedProcedures--; 961 } 962 } 963 } 964 } 965 } else if (ast.tokencode == TBaseType.rrw_begin) { 966 if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 967 waitingEnds[nestedProcedures]++; 968 } 969 if (nestedProcedures > 0) { 970 nestedProcedures--; 971 } 972 procedure_status[nestedProcedures] = stored_procedure_status.body; 973 } else if (ast.tokencode == TBaseType.rrw_end) { 974 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1) 975 && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body) 976 || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) { 977 nestedProcedures--; 978 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 979 } 980 } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 981 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 982 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 983 nestedProcedures--; 984 nestedProcedures++; 985 waitingEnds[nestedProcedures] = 0; 986 procedure_status[nestedProcedures] = stored_procedure_status.start; 987 } 988 } else if (ast.tokencode == TBaseType.rrw_oracle_cursor) { 989 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 990 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 991 nestedProcedures--; 992 nestedProcedures++; 993 waitingEnds[nestedProcedures] = 0; 994 procedure_status[nestedProcedures] = stored_procedure_status.cursor_declare; 995 } 996 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) { 997 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 998 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) 999 && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 1000 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1001 gst = EFindSqlStateType.stnormal; 1002 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1003 1004 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1005 appendToken(gcurrentsqlstatement, ast); 1006 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1007 } else if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 1008 if (ast.tokencode == TBaseType.rrw_trigger) { 1009 TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1); 1010 if (compoundSt != null) { 1011 procedure_status[nestedProcedures] = stored_procedure_status.body; 1012 waitingEnds[nestedProcedures]++; 1013 } 1014 } 1015 } else if ((sptype[nestedProcedures] == stored_procedure_type.function) 1016 && (ast.tokencode == TBaseType.rrw_teradata_using)) { 1017 if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) { 1018 if (nestedProcedures == 0) { 1019 gst = EFindSqlStateType.stsql; 1020 } else { 1021 procedure_status[nestedProcedures] = stored_procedure_status.body; 1022 nestedProcedures--; 1023 } 1024 } 1025 } 1026 break; 1027 case is_as: 1028 if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 1029 nestedProcedures++; 1030 if (nestedProcedures > stored_procedure_nested_level - 1) { 1031 gst = EFindSqlStateType.sterror; 1032 nestedProcedures--; 1033 } else { 1034 waitingEnds[nestedProcedures] = 0; 1035 procedure_status[nestedProcedures] = stored_procedure_status.start; 1036 } 1037 } else if (ast.tokencode == TBaseType.rrw_begin) { 1038 if ((nestedProcedures == 0) && 1039 ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1040 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1041 // top level package begin already counted 1042 } else { 1043 waitingEnds[nestedProcedures]++; 1044 } 1045 procedure_status[nestedProcedures] = stored_procedure_status.body; 1046 } else if (ast.tokencode == TBaseType.rrw_end) { 1047 if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) 1048 && ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1049 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1050 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1051 waitingEnds[nestedProcedures]--; 1052 } else { 1053 waitingEnds[nestedProcedures]--; 1054 } 1055 } else if (ast.tokencode == TBaseType.rrw_case) { 1056 if (ast.searchToken(';', 1) == null) { 1057 waitingEnds[nestedProcedures]++; 1058 } 1059 } 1060 break; 1061 case body: 1062 if (ast.tokencode == TBaseType.rrw_begin) { 1063 waitingEnds[nestedProcedures]++; 1064 } else if (ast.tokencode == TBaseType.rrw_if) { 1065 if (ast.searchToken(';', 2) == null) { 1066 waitingEnds[nestedProcedures]++; 1067 } 1068 } else if (ast.tokencode == TBaseType.rrw_case) { 1069 if (ast.searchToken(';', 2) == null) { 1070 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 1071 waitingEnds[nestedProcedures]++; 1072 } 1073 } 1074 } else if (ast.tokencode == TBaseType.rrw_loop) { 1075 if (!((ast.searchToken(TBaseType.rrw_end, -1) != null) 1076 && (ast.searchToken(';', 2) != null))) { 1077 waitingEnds[nestedProcedures]++; 1078 } 1079 } else if (ast.tokencode == TBaseType.rrw_end) { 1080 waitingEnds[nestedProcedures]--; 1081 if (waitingEnds[nestedProcedures] == 0) { 1082 if (nestedProcedures == 0) { 1083 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1084 } else { 1085 nestedProcedures--; 1086 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 1087 } 1088 } 1089 } else if ((waitingEnds[nestedProcedures] == 0) 1090 && (ast.tokentype == ETokenType.ttslash) 1091 && (ast.tokencode == TBaseType.sqlpluscmd)) { 1092 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1093 gst = EFindSqlStateType.stnormal; 1094 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1095 1096 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1097 appendToken(gcurrentsqlstatement, ast); 1098 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1099 } 1100 break; 1101 case bodyend: 1102 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 1103 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 1104 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1105 gst = EFindSqlStateType.stnormal; 1106 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1107 1108 //make / a sqlplus cmd 1109 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1110 appendToken(gcurrentsqlstatement, ast); 1111 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1112 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 1113 // single dot at a seperate line 1114 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1115 gst = EFindSqlStateType.stnormal; 1116 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1117 1118 //make ttperiod a sqlplus cmd 1119 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1120 appendToken(gcurrentsqlstatement, ast); 1121 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1122 } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) { 1123 appendToken(gcurrentsqlstatement, ast); 1124 gst = EFindSqlStateType.stnormal; 1125 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1126 } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) { 1127 appendToken(gcurrentsqlstatement, ast); 1128 gst = EFindSqlStateType.stnormal; 1129 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1130 } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) { 1131 appendToken(gcurrentsqlstatement, ast); 1132 gst = EFindSqlStateType.stnormal; 1133 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1134 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1135 && ((ast.searchToken(TBaseType.rrw_package, 4) != null) || (ast.searchToken(TBaseType.rrw_package, 5) != null)) 1136 && (!endBySlashOnly)) { 1137 appendToken(gcurrentsqlstatement, ast); 1138 gst = EFindSqlStateType.stnormal; 1139 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1140 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1141 && ((ast.searchToken(TBaseType.rrw_procedure, 4) != null) 1142 || (ast.searchToken(TBaseType.rrw_function, 4) != null) 1143 || (ast.searchToken(TBaseType.rrw_view, 4) != null) 1144 || (ast.searchToken(TBaseType.rrw_oracle_synonym, 4) != null) 1145 || (ast.searchToken(TBaseType.rrw_trigger, 4) != null)) 1146 && (!endBySlashOnly)) { 1147 appendToken(gcurrentsqlstatement, ast); 1148 gst = EFindSqlStateType.stnormal; 1149 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1150 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) { 1151 appendToken(gcurrentsqlstatement, ast); 1152 gst = EFindSqlStateType.stnormal; 1153 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1154 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) { 1155 appendToken(gcurrentsqlstatement, ast); 1156 gst = EFindSqlStateType.stnormal; 1157 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1158 } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) { 1159 appendToken(gcurrentsqlstatement, ast); 1160 gst = EFindSqlStateType.stnormal; 1161 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1162 } else if ((ast.searchToken(TBaseType.rrw_call, 1) != null) && (!endBySlashOnly)) { 1163 appendToken(gcurrentsqlstatement, ast); 1164 gst = EFindSqlStateType.stnormal; 1165 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1166 } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) { 1167 appendToken(gcurrentsqlstatement, ast); 1168 gst = EFindSqlStateType.stnormal; 1169 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1170 } else if ((ast.searchToken(TBaseType.rrw_declare, 1) != null) && (!endBySlashOnly)) { 1171 appendToken(gcurrentsqlstatement, ast); 1172 gst = EFindSqlStateType.stnormal; 1173 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1174 } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) 1175 && (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) { 1176 appendToken(gcurrentsqlstatement, ast); 1177 gst = EFindSqlStateType.stnormal; 1178 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1179 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) 1180 && (ast.searchToken(TBaseType.rrw_table, 2) != null) && (!endBySlashOnly)) { 1181 appendToken(gcurrentsqlstatement, ast); 1182 gst = EFindSqlStateType.stnormal; 1183 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1184 } else { 1185 appendToken(gcurrentsqlstatement, ast); 1186 } 1187 break; 1188 case end: 1189 break; 1190 default: 1191 break; 1192 } 1193 1194 if (ast.tokencode == TBaseType.sqlpluscmd) { 1195 int m = flexer.getkeywordvalue(ast.getAstext()); 1196 if (m != 0) { 1197 ast.tokencode = m; 1198 } else if (ast.tokentype == ETokenType.ttslash) { 1199 ast.tokencode = '/'; 1200 } else { 1201 ast.tokencode = TBaseType.ident; 1202 } 1203 } 1204 1205 final int wrapped_keyword_max_pos = 20; 1206 if ((ast.tokencode == TBaseType.rrw_wrapped) 1207 && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) { 1208 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) { 1209 ((gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true); 1210 } 1211 1212 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) { 1213 if (ast.prevSolidToken() != null) { 1214 ((gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) gcurrentsqlstatement) 1215 .setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken())); 1216 } 1217 } 1218 } 1219 1220 break; 1221 } //ststoredprocedure 1222 1223 } //switch 1224 }//for 1225 1226 //last statement 1227 if ((gcurrentsqlstatement != null) && 1228 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || 1229 (gst == EFindSqlStateType.sterror))) { 1230 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, true, builder); 1231 } 1232 1233 // Populate builder with results 1234 builder.sqlStatements(this.sqlstatements); 1235 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 1236 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 1237 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 1238 builder.errorMessage(syntaxErrors.isEmpty() ? "" : 1239 String.format("Raw extraction completed with %d error(s)", syntaxErrors.size())); 1240 } 1241 1242 /** 1243 * Handle token transformations during raw statement extraction. 1244 * <p> 1245 * This performs Dameng-specific keyword disambiguation that must happen 1246 * before statement boundary detection. Examples: 1247 * <ul> 1248 * <li>RETURN after WHERE → treat as identifier</li> 1249 * <li>VALUE after BY → mark as value_after_by</li> 1250 * <li>NEW → treat as identifier or constructor based on context</li> 1251 * <li>And many more Dameng-specific cases</li> 1252 * </ul> 1253 * 1254 * @param ast current token being processed 1255 */ 1256 private void performRawStatementTokenTransformations(TSourceToken ast) { 1257 // This method contains the keyword transformation logic from dodamenggetrawsqlstatements 1258 // It's been extracted to keep the main method more readable 1259 1260 if (ast.tokencode == TBaseType.rrw_return) { 1261 TSourceToken stMatch = ast.searchToken(TBaseType.rrw_where, 1); 1262 if (stMatch != null) { 1263 ast.tokencode = TBaseType.ident; 1264 } 1265 } else if (ast.tokencode == TBaseType.rrw_value_oracle) { 1266 TSourceToken stBy = ast.searchToken(TBaseType.rrw_by, -1); 1267 if (stBy != null) { 1268 ast.tokencode = TBaseType.rrw_value_after_by; 1269 } 1270 } else if (ast.tokencode == TBaseType.rrw_new_oracle) { 1271 TSourceToken stRightParen = ast.searchToken(')', -1); 1272 if (stRightParen != null) { 1273 ast.tokencode = TBaseType.ident; 1274 } 1275 TSourceToken stDot = ast.searchToken('.', 1); 1276 if (stDot != null) { 1277 ast.tokencode = TBaseType.ident; 1278 } 1279 1280 TSourceToken stNext = ast.searchTokenAfterObjectName(); 1281 stDot = ast.searchToken('.', 1); 1282 if ((stDot == null) && (stNext != null) && (stNext.tokencode == '(')) { 1283 ast.tokencode = TBaseType.rrw_oracle_new_constructor; 1284 } 1285 } else if (ast.tokencode == TBaseType.rrw_chr_oracle) { 1286 TSourceToken stLeftParen = ast.searchToken('(', 1); 1287 if (stLeftParen == null) { 1288 ast.tokencode = TBaseType.ident; 1289 } 1290 } else if (ast.tokencode == TBaseType.rrw_log_oracle) { 1291 TSourceToken stNext = ast.searchToken(TBaseType.rrw_errors_oracle, 1); 1292 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_view, -1); 1293 if (stPrev == null) { 1294 stPrev = ast.searchToken(TBaseType.rrw_oracle_supplemental, -1); 1295 } 1296 if (stPrev == null) { 1297 stPrev = ast.searchToken(1223, -1); // RW_ADVANCED: keep LOG as keyword in ADVANCED LOG 1298 } 1299 if ((stNext == null) && (stPrev == null)) { 1300 ast.tokencode = TBaseType.ident; 1301 } 1302 } else if (ast.tokencode == TBaseType.rrw_delete) { 1303 TSourceToken stPrev = ast.searchToken('.', -1); 1304 if (stPrev != null) { 1305 ast.tokencode = TBaseType.ident; 1306 } 1307 } else if (ast.tokencode == TBaseType.rrw_partition) { 1308 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_add, -1); 1309 if (stPrev != null) { 1310 stPrev.tokencode = TBaseType.rrw_add_p; 1311 } 1312 } else if (ast.tokencode == TBaseType.rrw_oracle_column) { 1313 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_oracle_modify, -1); 1314 if (stPrev != null) { 1315 ast.tokencode = TBaseType.rrw_oracle_column_after_modify; 1316 } 1317 } else if (ast.tokencode == TBaseType.rrw_oracle_apply) { 1318 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_outer, -1); 1319 if (stPrev != null) { 1320 stPrev.tokencode = TBaseType.ORACLE_OUTER2; 1321 } 1322 } else if (ast.tokencode == TBaseType.rrw_oracle_subpartition) { 1323 TSourceToken stNext = ast.searchToken("(", 2); 1324 if (stNext != null) { 1325 TSourceToken st1 = ast.nextSolidToken(); 1326 if (st1.toString().equalsIgnoreCase("template")) { 1327 // don't change, keep as RW_SUBPARTITION 1328 } else { 1329 ast.tokencode = TBaseType.rrw_oracle_subpartition_tablesample; 1330 } 1331 } 1332 } else if (ast.tokencode == TBaseType.rrw_primary) { 1333 TSourceToken stNext = ast.searchToken("key", 1); 1334 if (stNext == null) { 1335 ast.tokencode = TBaseType.ident; 1336 } 1337 } else if (ast.tokencode == TBaseType.rrw_oracle_offset) { 1338 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_row, 2); 1339 if (stNext == null) { 1340 stNext = ast.searchToken(TBaseType.rrw_oracle_rows, 2); 1341 } 1342 if (stNext != null) { 1343 ast.tokencode = TBaseType.rrw_oracle_offset_row; 1344 } 1345 } else if (ast.tokencode == TBaseType.rrw_translate) { 1346 TSourceToken stNext = ast.searchToken("(", 2); 1347 if (stNext == null) { 1348 ast.tokencode = TBaseType.ident; 1349 } 1350 } else if (ast.tokencode == TBaseType.rrw_constraint) { 1351 TSourceToken stNext = ast.nextSolidToken(); 1352 if (stNext == null) { 1353 ast.tokencode = TBaseType.ident; 1354 } else { 1355 if (stNext.tokencode != TBaseType.ident) { 1356 ast.tokencode = TBaseType.ident; 1357 } 1358 } 1359 } else if (ast.tokencode == TBaseType.rrw_oracle_without) { 1360 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_count, 1); 1361 if (stNext != null) { 1362 ast.tokencode = TBaseType.rrw_oracle_without_before_count; 1363 } 1364 } else if (ast.tokencode == TBaseType.rrw_bulk) { 1365 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_collect, 1); 1366 if (stNext == null) { 1367 ast.tokencode = TBaseType.ident; 1368 } 1369 } else if (ast.tokencode == TBaseType.rrw_oracle_model) { 1370 TSourceToken stNext = ast.nextSolidToken(); 1371 if (stNext != null) { 1372 switch (stNext.toString().toUpperCase()) { 1373 case "RETURN": 1374 case "REFERENCE": 1375 case "IGNORE": 1376 case "KEEP": 1377 case "UNIQUE": 1378 case "PARTITION": 1379 case "DIMENSION": 1380 case "MEASURES": 1381 case "RULES": 1382 ast.tokencode = TBaseType.rrw_oracle_model_in_model_clause; 1383 break; 1384 default: 1385 ; 1386 } 1387 } 1388 } 1389 } 1390 1391 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 1392 if (statement == null || token == null) { 1393 return; 1394 } 1395 token.stmt = statement; 1396 statement.sourcetokenlist.add(token); 1397 } 1398 1399 // ========== Error Handling and Recovery ========== 1400 1401 /** 1402 * Find all syntax errors in PL/SQL statements recursively. 1403 * Extracted from TGSqlParser.findAllSyntaxErrorsInPlsql(). 1404 */ 1405 private void findAllSyntaxErrorsInPlsql(TCustomSqlStatement psql) { 1406 if (psql.getErrorCount() > 0) { 1407 copyErrorsFromStatement(psql); 1408 } 1409 1410 for (int k = 0; k < psql.getStatements().size(); k++) { 1411 findAllSyntaxErrorsInPlsql(psql.getStatements().get(k)); 1412 } 1413 } 1414 1415 /** 1416 * Handle error recovery for CREATE TABLE/INDEX statements. 1417 * Dameng allows table properties that may not be fully parsed. 1418 * This method marks unparseable properties as SQL*Plus commands to skip them. 1419 * 1420 * <p>Extracted from TGSqlParser.doparse() lines 16916-16971 1421 */ 1422 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 1423 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) || 1424 (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) && 1425 (!TBaseType.c_createTableStrictParsing)) { 1426 1427 // Find the closing parenthesis of table definition 1428 int nested = 0; 1429 boolean isIgnore = false, isFoundIgnoreToken = false; 1430 TSourceToken firstIgnoreToken = null; 1431 1432 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 1433 TSourceToken st = stmt.sourcetokenlist.get(k); 1434 1435 if (isIgnore) { 1436 if (st.issolidtoken() && (st.tokencode != ';')) { 1437 isFoundIgnoreToken = true; 1438 if (firstIgnoreToken == null) { 1439 firstIgnoreToken = st; 1440 } 1441 } 1442 if (st.tokencode != ';') { 1443 st.tokencode = TBaseType.sqlpluscmd; 1444 } 1445 continue; 1446 } 1447 1448 if (st.tokencode == (int) ')') { 1449 nested--; 1450 if (nested == 0) { 1451 // Check if next token is "AS ( SELECT" 1452 boolean isSelect = false; 1453 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 1454 if (st1 != null) { 1455 TSourceToken st2 = st.searchToken((int) '(', 2); 1456 if (st2 != null) { 1457 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 1458 isSelect = (st3 != null); 1459 } 1460 } 1461 if (!isSelect) isIgnore = true; 1462 } 1463 } 1464 1465 if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) { 1466 nested++; 1467 } 1468 } 1469 1470 // Verify it's a valid Dameng table property 1471 if ((firstIgnoreToken != null) && 1472 (!TBaseType.searchOracleTablePros(firstIgnoreToken.toString()))) { 1473 // Not a valid property, keep the error 1474 isFoundIgnoreToken = false; 1475 } 1476 1477 // Retry parsing if we found ignoreable properties 1478 if (isFoundIgnoreToken) { 1479 stmt.clearError(); 1480 stmt.parsestatement(null, false); 1481 } 1482 } 1483 } 1484 1485 /** 1486 * Copy syntax errors from a statement to our error list. 1487 * Extracted from TGSqlParser.copyerrormsg(). 1488 */ 1489 1490 @Override 1491 public String toString() { 1492 return "DamengSqlParser{vendor=" + vendor + "}"; 1493 } 1494 1495 // ========== Main Dameng Tokenization ========== 1496 // Core tokenization logic extracted from TGSqlParser.dodamengsqltexttotokenlist() 1497 1498 /** 1499 * Perform Dameng-specific tokenization with SQL*Plus command detection. 1500 * <p> 1501 * This method implements Dameng's complex tokenization rules including: 1502 * <ul> 1503 * <li>SQL*Plus command detection (SPOOL, SET, START, etc.)</li> 1504 * <li>Forward slash disambiguation (division vs PL/SQL delimiter)</li> 1505 * <li>Dameng-specific keyword transformations (INNER, TYPE, FULL, etc.)</li> 1506 * <li>Context-dependent token code modifications</li> 1507 * </ul> 1508 * 1509 * <p><b>State Machine:</b> Uses 5 boolean flags to track tokenization state: 1510 * <ul> 1511 * <li>{@code insqlpluscmd} - Currently inside SQL*Plus command</li> 1512 * <li>{@code isvalidplace} - Valid place to start SQL*Plus command</li> 1513 * <li>{@code waitingreturnforfloatdiv} - Slash seen, waiting for newline</li> 1514 * <li>{@code waitingreturnforsemicolon} - Semicolon seen, waiting for newline</li> 1515 * <li>{@code continuesqlplusatnewline} - SQL*Plus command continues to next line</li> 1516 * </ul> 1517 * 1518 * <p><b>Extracted from:</b> TGSqlParser.dodamengsqltexttotokenlist() (lines 3931-4298) 1519 * 1520 * @throws RuntimeException if tokenization fails 1521 */ 1522 private void dodamengsqltexttotokenlist() { 1523 // Initialize state machine for SQL*Plus command detection 1524 insqlpluscmd = false; 1525 isvalidplace = true; 1526 waitingreturnforfloatdiv = false; 1527 waitingreturnforsemicolon = false; 1528 continuesqlplusatnewline = false; 1529 1530 ESqlPlusCmd currentCmdType = ESqlPlusCmd.spcUnknown; 1531 1532 TSourceToken lct = null, prevst = null; 1533 1534 TSourceToken asourcetoken, lcprevst; 1535 int yychar; 1536 1537 asourcetoken = getanewsourcetoken(); 1538 if (asourcetoken == null) return; 1539 yychar = asourcetoken.tokencode; 1540 1541 while (yychar > 0) { 1542 sourcetokenlist.add(asourcetoken); 1543 1544 switch (yychar) { 1545 case TBaseType.cmtdoublehyphen: 1546 case TBaseType.cmtslashstar: 1547 case TBaseType.lexspace: { 1548 if (insqlpluscmd) { 1549 asourcetoken.insqlpluscmd = true; 1550 } 1551 break; 1552 } 1553 1554 case TBaseType.lexnewline: { 1555 if (insqlpluscmd) { 1556 insqlpluscmd = false; 1557 isvalidplace = true; 1558 1559 if (continuesqlplusatnewline) { 1560 insqlpluscmd = true; 1561 isvalidplace = false; 1562 asourcetoken.insqlpluscmd = true; 1563 } 1564 1565 if (!insqlpluscmd) { 1566 currentCmdType = ESqlPlusCmd.spcUnknown; 1567 } 1568 } 1569 1570 if (waitingreturnforsemicolon) { 1571 isvalidplace = true; 1572 } 1573 1574 if (waitingreturnforfloatdiv) { 1575 isvalidplace = true; 1576 lct.tokencode = TBaseType.sqlpluscmd; 1577 if (lct.tokentype != ETokenType.ttslash) { 1578 lct.tokentype = ETokenType.ttsqlpluscmd; 1579 } 1580 } 1581 1582 if (countLines(asourcetoken.toString()) > 1) { 1583 // There is a line after select, so spool is the right place to start a sqlplus command 1584 isvalidplace = true; 1585 } 1586 1587 flexer.insqlpluscmd = insqlpluscmd; 1588 break; 1589 } 1590 1591 default: { 1592 // Solid token 1593 // Save semicolon flag before clearing: slash after semicolon on 1594 // the same line (e.g. "END; /") should be a SQL*Plus delimiter, 1595 // not division. 1596 boolean prevWasSemicolon = waitingreturnforsemicolon; 1597 continuesqlplusatnewline = false; 1598 waitingreturnforsemicolon = false; 1599 waitingreturnforfloatdiv = false; 1600 1601 if (insqlpluscmd) { 1602 asourcetoken.insqlpluscmd = true; 1603 if (asourcetoken.toString().equalsIgnoreCase("-")) { 1604 continuesqlplusatnewline = true; 1605 } 1606 } else { 1607 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 1608 waitingreturnforsemicolon = true; 1609 } 1610 1611 if ((asourcetoken.tokentype == ETokenType.ttslash) 1612 && (isvalidplace || prevWasSemicolon || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 1613 lct = asourcetoken; 1614 waitingreturnforfloatdiv = true; 1615 } 1616 1617 currentCmdType = TSqlplusCmdStatement.searchCmd(asourcetoken.toString(), asourcetoken.nextToken()); 1618 if (currentCmdType != ESqlPlusCmd.spcUnknown) { 1619 if (isvalidplace) { 1620 TSourceToken lnbreak = null; 1621 boolean aRealSqlplusCmd = true; 1622 if (sourcetokenlist.curpos > 0) { 1623 lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1624 aRealSqlplusCmd = !spaceAtTheEndOfReturnToken(lnbreak.toString()); 1625 } 1626 1627 if (aRealSqlplusCmd) { 1628 asourcetoken.prevTokenCode = asourcetoken.tokencode; 1629 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1630 if (asourcetoken.tokentype != ETokenType.ttslash) { 1631 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1632 } 1633 insqlpluscmd = true; 1634 flexer.insqlpluscmd = insqlpluscmd; 1635 } 1636 } else if ((asourcetoken.tokencode == TBaseType.rrw_connect) && (sourcetokenlist.returnbeforecurtoken(true))) { 1637 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1638 if (asourcetoken.tokentype != ETokenType.ttslash) { 1639 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1640 } 1641 insqlpluscmd = true; 1642 flexer.insqlpluscmd = insqlpluscmd; 1643 } else if (sourcetokenlist.returnbeforecurtoken(true)) { 1644 TSourceToken lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1645 1646 if ((countLines(lnbreak.toString()) > 1) && (!spaceAtTheEndOfReturnToken(lnbreak.toString()))) { 1647 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1648 if (asourcetoken.tokentype != ETokenType.ttslash) { 1649 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1650 } 1651 insqlpluscmd = true; 1652 flexer.insqlpluscmd = insqlpluscmd; 1653 } 1654 } 1655 } 1656 } 1657 1658 isvalidplace = false; 1659 1660 // Dameng-specific keyword handling (inline to match legacy behavior) 1661 if (prevst != null) { 1662 if (prevst.tokencode == TBaseType.rrw_inner) { 1663 if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) { 1664 prevst.tokencode = TBaseType.ident; 1665 } 1666 } else if ((prevst.tokencode == TBaseType.rrw_not) 1667 && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) { 1668 prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE"); 1669 } 1670 } 1671 1672 if (asourcetoken.tokencode == TBaseType.rrw_inner) { 1673 prevst = asourcetoken; 1674 } else if (asourcetoken.tokencode == TBaseType.rrw_not) { 1675 prevst = asourcetoken; 1676 } else { 1677 prevst = null; 1678 } 1679 1680 // Dameng keyword transformations that rely on prev token state 1681 if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD")) 1682 || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) { 1683 lcprevst = getprevsolidtoken(asourcetoken); 1684 if (lcprevst != null) { 1685 if (lcprevst.tokencode == TBaseType.rrw_for) 1686 lcprevst.tokencode = TBaseType.rw_for1; 1687 } 1688 } else if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) { 1689 TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2); 1690 if (stKeep != null) { 1691 stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank; 1692 } 1693 } else if (asourcetoken.tokencode == TBaseType.rrw_full) { 1694 TSourceToken stMatch = asourcetoken.searchToken(TBaseType.rrw_match, -1); 1695 if (stMatch != null) { 1696 asourcetoken.tokencode = TBaseType.RW_FULL2; 1697 } 1698 } else if (asourcetoken.tokencode == TBaseType.rrw_join) { 1699 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1700 if (stFull != null) { 1701 stFull.tokencode = TBaseType.RW_FULL2; 1702 } else { 1703 TSourceToken stNatural = asourcetoken.searchToken(TBaseType.rrw_natural, -4); 1704 if (stNatural != null) { 1705 stNatural.tokencode = TBaseType.RW_NATURAL2; 1706 } 1707 } 1708 } else if (asourcetoken.tokencode == TBaseType.rrw_outer) { 1709 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1710 if (stFull != null) { 1711 stFull.tokencode = TBaseType.RW_FULL2; 1712 } 1713 } else if (asourcetoken.tokencode == TBaseType.rrw_is) { 1714 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1715 if (stType != null) { 1716 stType.tokencode = TBaseType.rrw_type2; 1717 } 1718 } else if (asourcetoken.tokencode == TBaseType.rrw_as) { 1719 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1720 if (stType != null) { 1721 stType.tokencode = TBaseType.rrw_type2; 1722 } 1723 } else if (asourcetoken.tokencode == TBaseType.rrw_oid) { 1724 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1725 if (stType != null) { 1726 stType.tokencode = TBaseType.rrw_type2; 1727 } 1728 } else if (asourcetoken.tokencode == TBaseType.rrw_type) { 1729 TSourceToken stPrev; 1730 stPrev = asourcetoken.searchToken(TBaseType.rrw_drop, -1); 1731 if (stPrev != null) { 1732 asourcetoken.tokencode = TBaseType.rrw_type2; 1733 } 1734 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1735 stPrev = asourcetoken.searchToken(TBaseType.rrw_of, -1); 1736 if (stPrev != null) { 1737 asourcetoken.tokencode = TBaseType.rrw_type2; 1738 } 1739 } 1740 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1741 stPrev = asourcetoken.searchToken(TBaseType.rrw_create, -1); 1742 if (stPrev != null) { 1743 asourcetoken.tokencode = TBaseType.rrw_type2; 1744 } 1745 } 1746 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1747 stPrev = asourcetoken.searchToken(TBaseType.rrw_replace, -1); 1748 if (stPrev != null) { 1749 asourcetoken.tokencode = TBaseType.rrw_type2; 1750 } 1751 } 1752 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1753 stPrev = asourcetoken.searchToken('%', -1); 1754 if (stPrev != null) { 1755 asourcetoken.tokencode = TBaseType.rrw_type2; 1756 } 1757 } 1758 } else if ((asourcetoken.tokencode == TBaseType.rrw_by) || (asourcetoken.tokencode == TBaseType.rrw_to)) { 1759 lcprevst = getprevsolidtoken(asourcetoken); 1760 if (lcprevst != null) { 1761 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("connect"))) { 1762 lcprevst.tokencode = TBaseType.rrw_connect; 1763 lcprevst.tokentype = ETokenType.ttkeyword; 1764 flexer.insqlpluscmd = false; 1765 1766 continuesqlplusatnewline = false; 1767 waitingreturnforsemicolon = false; 1768 waitingreturnforfloatdiv = false; 1769 isvalidplace = false; 1770 insqlpluscmd = false; 1771 } 1772 } 1773 } else if (asourcetoken.tokencode == TBaseType.rrw_with) { 1774 lcprevst = getprevsolidtoken(asourcetoken); 1775 if (lcprevst != null) { 1776 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("start"))) { 1777 lcprevst.tokencode = TBaseType.rrw_start; 1778 lcprevst.tokentype = ETokenType.ttkeyword; 1779 flexer.insqlpluscmd = false; 1780 1781 continuesqlplusatnewline = false; 1782 waitingreturnforsemicolon = false; 1783 waitingreturnforfloatdiv = false; 1784 isvalidplace = false; 1785 insqlpluscmd = false; 1786 } 1787 } 1788 } else if (asourcetoken.tokencode == TBaseType.rrw_set) { 1789 lcprevst = getprevsolidtoken(asourcetoken); 1790 if (lcprevst != null) { 1791 if (lcprevst.getAstext().equalsIgnoreCase("a")) { 1792 TSourceToken lcpp = getprevsolidtoken(lcprevst); 1793 if (lcpp != null) { 1794 if ((lcpp.tokencode == TBaseType.rrw_not) || (lcpp.tokencode == TBaseType.rrw_is)) { 1795 lcprevst.tokencode = TBaseType.rrw_oracle_a_in_aset; 1796 asourcetoken.tokencode = TBaseType.rrw_oracle_set_in_aset; 1797 } 1798 } 1799 } 1800 } 1801 } 1802 1803 break; 1804 } 1805 } 1806 1807 // Get next token 1808 asourcetoken = getanewsourcetoken(); 1809 if (asourcetoken != null) { 1810 yychar = asourcetoken.tokencode; 1811 1812 // Handle special case: dot after SQL*Plus commands 1813 if ((asourcetoken.tokencode == '.') && (getprevsolidtoken(asourcetoken) != null) 1814 && ((currentCmdType == ESqlPlusCmd.spcAppend) 1815 || (currentCmdType == ESqlPlusCmd.spcChange) || (currentCmdType == ESqlPlusCmd.spcInput) 1816 || (currentCmdType == ESqlPlusCmd.spcList) || (currentCmdType == ESqlPlusCmd.spcRun))) { 1817 // a.ent_rp_usr_id is not a real sqlplus command 1818 TSourceToken lcprevst2 = getprevsolidtoken(asourcetoken); 1819 lcprevst2.insqlpluscmd = false; 1820 if (lcprevst2.prevTokenCode != 0) { 1821 lcprevst2.tokencode = lcprevst2.prevTokenCode; 1822 } else { 1823 lcprevst2.tokencode = TBaseType.ident; 1824 } 1825 1826 flexer.insqlpluscmd = false; 1827 continuesqlplusatnewline = false; 1828 waitingreturnforsemicolon = false; 1829 waitingreturnforfloatdiv = false; 1830 isvalidplace = false; 1831 insqlpluscmd = false; 1832 } 1833 } else { 1834 yychar = 0; 1835 1836 if (waitingreturnforfloatdiv) { 1837 // / at the end of line treat as sqlplus command 1838 lct.tokencode = TBaseType.sqlpluscmd; 1839 if (lct.tokentype != ETokenType.ttslash) { 1840 lct.tokentype = ETokenType.ttsqlpluscmd; 1841 } 1842 } 1843 } 1844 1845 if ((yychar == 0) && (prevst != null)) { 1846 if (prevst.tokencode == TBaseType.rrw_inner) { 1847 prevst.tokencode = TBaseType.ident; 1848 } 1849 } 1850 } 1851 } 1852 1853 // ========== Helper Methods for Tokenization ========== 1854 // These methods support Dameng-specific tokenization logic 1855 1856 /** 1857 * Count number of newlines in a string. 1858 * 1859 * @param s string to analyze 1860 * @return number of line breaks (LF or CR) 1861 */ 1862 private int countLines(String s) { 1863 int pos = 0, lf = 0, cr = 0; 1864 1865 while (pos < s.length()) { 1866 if (s.charAt(pos) == '\r') { 1867 cr++; 1868 pos++; 1869 continue; 1870 } 1871 if (s.charAt(pos) == '\n') { 1872 lf++; 1873 pos++; 1874 continue; 1875 } 1876 1877 if (s.charAt(pos) == ' ') { 1878 pos++; 1879 continue; 1880 } 1881 break; 1882 } 1883 1884 if (lf >= cr) return lf; 1885 else return cr; 1886 } 1887 1888 /** 1889 * Check if return token ends with space or tab. 1890 * 1891 * @param s token text 1892 * @return true if ends with space/tab 1893 */ 1894 private boolean spaceAtTheEndOfReturnToken(String s) { 1895 if (s == null) return false; 1896 if (s.length() == 0) return false; 1897 1898 return ((s.charAt(s.length() - 1) == ' ') || (s.charAt(s.length() - 1) == '\t')); 1899 } 1900 1901 /** 1902 * Determine if forward slash should be treated as SQL*Plus command delimiter. 1903 * <p> 1904 * Dameng uses '/' as both division operator and SQL*Plus block delimiter. 1905 * This method disambiguates by checking if the '/' appears at the beginning 1906 * of a line (after a return token without trailing whitespace). 1907 * 1908 * @param pstlist token list 1909 * @param pPos position of '/' token 1910 * @return true if '/' should be SQL*Plus command 1911 */ 1912 private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) { 1913 boolean ret = false; 1914 1915 if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret; 1916 1917 // Token directly before div must be ttreturn without space appending it 1918 gudusoft.gsqlparser.TSourceToken lcst = pstlist.get(pPos - 1); 1919 if (lcst.tokentype != gudusoft.gsqlparser.ETokenType.ttreturn) { 1920 return ret; 1921 } 1922 1923 if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) { 1924 ret = true; 1925 } 1926 1927 return ret; 1928 } 1929 1930 /** 1931 * Get previous non-whitespace token. 1932 * 1933 * @param ptoken current token 1934 * @return previous solid token, or null 1935 */ 1936 private gudusoft.gsqlparser.TSourceToken getprevsolidtoken(gudusoft.gsqlparser.TSourceToken ptoken) { 1937 gudusoft.gsqlparser.TSourceToken ret = null; 1938 TSourceTokenList lctokenlist = ptoken.container; 1939 1940 if (lctokenlist != null) { 1941 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 1942 if (!( 1943 (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttwhitespace) 1944 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttreturn) 1945 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttsimplecomment) 1946 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttbracketedcomment) 1947 )) { 1948 ret = lctokenlist.get(ptoken.posinlist - 1); 1949 } else { 1950 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 1951 } 1952 } 1953 } 1954 return ret; 1955 } 1956}