001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerOracle; 009import gudusoft.gsqlparser.TParserOracleSql; 010import gudusoft.gsqlparser.TParserOraclePLSql; 011import gudusoft.gsqlparser.TSourceToken; 012import gudusoft.gsqlparser.TSourceTokenList; 013import gudusoft.gsqlparser.TStatementList; 014import gudusoft.gsqlparser.TSyntaxError; 015import gudusoft.gsqlparser.EFindSqlStateType; 016import gudusoft.gsqlparser.ESqlPlusCmd; 017import gudusoft.gsqlparser.ETokenType; 018import gudusoft.gsqlparser.ETokenStatus; 019import gudusoft.gsqlparser.ESqlStatementType; 020import gudusoft.gsqlparser.EErrorType; 021import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 022import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 023import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 025import gudusoft.gsqlparser.compiler.TContext; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TGlobalScope; 028import gudusoft.gsqlparser.compiler.TFrame; 029import gudusoft.gsqlparser.resolver.TSQLResolver; 030import gudusoft.gsqlparser.TLog; 031import gudusoft.gsqlparser.TGSqlParser; 032import gudusoft.gsqlparser.compiler.TASTEvaluator; 033import java.util.Stack; 034 035import java.io.BufferedReader; 036import java.util.ArrayList; 037import java.util.List; 038 039/** 040 * Oracle database SQL parser implementation. 041 * 042 * <p>This parser handles Oracle-specific SQL syntax including: 043 * <ul> 044 * <li>PL/SQL blocks (procedures, functions, packages, triggers)</li> 045 * <li>SQL*Plus commands (spool, set, show, etc.)</li> 046 * <li>Oracle-specific DML/DDL (MERGE, flashback, etc.)</li> 047 * <li>Oracle analytical functions and extensions</li> 048 * <li>Special token handling (INNER, NOT DEFERRABLE, etc.)</li> 049 * </ul> 050 * 051 * <p><b>Implementation Status:</b> PHASE 3 - IN PROGRESS 052 * <ul> 053 * <li><b>Completed:</b> Oracle classes (TLexerOracle, TParserOracleSql, TParserOraclePLSql) are now PUBLIC</li> 054 * <li><b>Current:</b> Skeleton implementation delegates to legacy TGSqlParser</li> 055 * <li><b>Next:</b> Extract vendor-specific logic from TGSqlParser into this class</li> 056 * <li><b>Goal:</b> Fully self-contained Oracle parser using AbstractSqlParser template</li> 057 * </ul> 058 * 059 * <p><b>Design Notes:</b> 060 * <ul> 061 * <li>Implements {@link SqlParser} directly (will extend {@link AbstractSqlParser} in Phase 4)</li> 062 * <li>Can now directly instantiate: {@link TLexerOracle}, {@link TParserOracleSql}, {@link TParserOraclePLSql}</li> 063 * <li>Uses two parsers: TParserOracleSql (SQL) + TParserOraclePLSql (PL/SQL blocks)</li> 064 * <li>Handles SQL*Plus commands via special tokenization logic</li> 065 * <li>Delimiter character: '/' for PL/SQL blocks, ';' for SQL statements</li> 066 * </ul> 067 * 068 * <p><b>Usage Example:</b> 069 * <pre> 070 * // Get Oracle parser from factory 071 * SqlParser parser = SqlParserFactory.get(EDbVendor.dbvoracle); 072 * 073 * // Build context 074 * ParserContext context = new ParserContext.Builder(EDbVendor.dbvoracle) 075 * .sqlText("SELECT * FROM emp WHERE deptno = 10") 076 * .build(); 077 * 078 * // Parse 079 * SqlParseResult result = parser.parse(context); 080 * 081 * // Access statements 082 * TStatementList statements = result.getSqlStatements(); 083 * </pre> 084 * 085 * <p><b>Phase 3 Extraction Roadmap:</b> 086 * <ol> 087 * <li>✅ DONE: Make TLexerOracle, TParserOracleSql, TParserOraclePLSql public</li> 088 * <li>⏳ TODO: Extract tokenization logic (~367 lines from TGSqlParser.dooraclesqltexttotokenlist())</li> 089 * <li>⏳ TODO: Extract raw statement logic (~200 lines from TGSqlParser.dooraclegetrawsqlstatements())</li> 090 * <li>⏳ TODO: Extract parsing orchestration (SQL vs PL/SQL parser selection)</li> 091 * <li>⏳ TODO: Extract helper methods (getanewsourcetoken, getprevsolidtoken, etc.)</li> 092 * <li>⏳ TODO: Extend AbstractSqlParser and use template method pattern fully</li> 093 * <li>⏳ TODO: Remove all delegation to TGSqlParser</li> 094 * </ol> 095 * 096 * <p><b>Key Methods to Extract from TGSqlParser:</b> 097 * <ul> 098 * <li>{@code dooraclesqltexttotokenlist()} - Oracle tokenization with SQL*Plus command detection</li> 099 * <li>{@code dooraclegetrawsqlstatements()} - Oracle raw statement boundaries (handles PL/SQL blocks)</li> 100 * <li>{@code getanewsourcetoken()} - Token iterator from lexer</li> 101 * <li>{@code getprevsolidtoken()} - Navigate token list backwards</li> 102 * <li>{@code IsValidPlaceForDivToSqlplusCmd()} - Slash vs divide operator disambiguation</li> 103 * <li>{@code countLines()} - Multi-line token handling</li> 104 * <li>{@code spaceAtTheEndOfReturnToken()} - SQL*Plus command validation</li> 105 * </ul> 106 * 107 * @see SqlParser 108 * @see AbstractSqlParser 109 * @see TLexerOracle 110 * @see TParserOracleSql 111 * @see TParserOraclePLSql 112 * @since 3.2.0.0 113 */ 114public class OracleSqlParser extends AbstractSqlParser { 115 116 /** 117 * Construct Oracle SQL parser. 118 * <p> 119 * Configures the parser for Oracle database with default delimiters: 120 * <ul> 121 * <li>SQL statements: semicolon (;)</li> 122 * <li>PL/SQL blocks: forward slash (/)</li> 123 * </ul> 124 * <p> 125 * Following the original TGSqlParser pattern, the lexer and parsers are 126 * created once in the constructor and reused for all parsing operations. 127 * This avoids unnecessary object allocation overhead since the parser 128 * is not thread-safe and designed for single-use per instance. 129 */ 130 public OracleSqlParser() { 131 super(EDbVendor.dbvoracle); 132 this.delimiterChar = '/'; // PL/SQL delimiter 133 this.defaultDelimiterStr = ";"; // SQL delimiter 134 135 // Create lexer once - will be reused for all parsing operations 136 // (matches original TGSqlParser constructor pattern at line 1033) 137 this.flexer = new TLexerOracle(); 138 this.flexer.delimiterchar = this.delimiterChar; 139 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 140 141 // Set parent's lexer reference for shared tokenization logic 142 this.lexer = this.flexer; 143 144 // Create parsers once - will be reused for all parsing operations 145 // Token list will be set/updated when parsing begins 146 // (matches original TGSqlParser constructor pattern at lines 1036-1040) 147 this.fparser = new TParserOracleSql(null); 148 this.fplsqlparser = new TParserOraclePLSql(null); 149 this.fparser.lexer = this.flexer; 150 this.fplsqlparser.lexer = this.flexer; 151 152 // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor 153 } 154 155 // ========== Tokenization State (used during tokenization) ========== 156 // These instance variables are used during the tokenization process 157 // and are set up at the beginning of tokenization 158 159 /** The Oracle lexer used for tokenization */ 160 public TLexerOracle flexer; // Package-accessible for TGSqlParser integration 161 162 // NOTE: sourcetokenlist moved to AbstractSqlParser (inherited) 163 164 /** Optional callback for token processing (can be null) */ 165 private Object tokenHandle; // TTokenCallback interface - keeping as Object for now 166 167 // State variables for tokenization (set during dooraclesqltexttotokenlist()) 168 private boolean continuesqlplusatnewline; 169 private boolean waitingreturnforsemicolon; 170 private boolean waitingreturnforfloatdiv; 171 private boolean isvalidplace; 172 private boolean insqlpluscmd; 173 174 // ========== Statement Parsing State (used during statement parsing) ========== 175 // These instance variables are used during the statement parsing process 176 177 // NOTE: The following fields moved to AbstractSqlParser (inherited): 178 // - sqlcmds (ISqlCmds) 179 // - sqlstatements (TStatementList) 180 // - parserContext (ParserContext) 181 182 /** Current statement being built */ 183 private TCustomSqlStatement gcurrentsqlstatement; 184 185 /** SQL parser (for regular SQL statements) */ 186 private TParserOracleSql fparser; 187 188 /** PL/SQL parser (for PL/SQL blocks) */ 189 private TParserOraclePLSql fplsqlparser; 190 191 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 192 // - protected TContext globalContext 193 // - protected TSQLEnv sqlEnv 194 // - protected Stack<TFrame> frameStack 195 // - protected TFrame globalFrame 196 197 // ========== Enums for State Machine ========== 198 // These enums are used by the dooraclegetrawsqlstatements state machine 199 200 enum stored_procedure_status {start,is_as,body,bodyend,end, cursor_declare}; 201 enum stored_procedure_type {function,procedure,package_spec,package_body, block_with_begin,block_with_declare, 202 create_trigger,create_library,cursor_in_package_spec,others}; 203 204 static final int stored_procedure_nested_level = 1024; 205 206 // ========== AbstractSqlParser Abstract Methods Implementation ========== 207 208 /** 209 * Return the Oracle lexer instance. 210 * <p> 211 * The lexer is created once in the constructor and reused for all 212 * parsing operations. This method simply returns the existing instance, 213 * matching the original TGSqlParser pattern where the lexer is created 214 * once and reset before each use. 215 * 216 * @param context parser context (not used, lexer already created) 217 * @return the Oracle lexer instance created in constructor 218 */ 219 @Override 220 protected TCustomLexer getLexer(ParserContext context) { 221 // Return existing lexer instance (created in constructor) 222 // No need to create new instance - matches original TGSqlParser pattern 223 return this.flexer; 224 } 225 226 /** 227 * Return the Oracle SQL parser instance with updated token list. 228 * <p> 229 * The parser is created once in the constructor and reused for all 230 * parsing operations. This method updates the token list and returns 231 * the existing instance, matching the original TGSqlParser pattern. 232 * 233 * @param context parser context (not used, parser already created) 234 * @param tokens source token list to parse 235 * @return the Oracle SQL parser instance created in constructor 236 */ 237 @Override 238 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 239 // Update token list for reused parser instance 240 this.fparser.sourcetokenlist = tokens; 241 return this.fparser; 242 } 243 244 /** 245 * Return the Oracle PL/SQL parser instance with updated token list. 246 * <p> 247 * Oracle needs a secondary parser (TParserOraclePLSql) for PL/SQL blocks 248 * (procedures, functions, packages, triggers, anonymous blocks). 249 * <p> 250 * The parser is created once in the constructor and reused for all 251 * parsing operations. This method updates the token list and returns 252 * the existing instance, matching the original TGSqlParser pattern. 253 * 254 * @param context parser context (not used, parser already created) 255 * @param tokens source token list to parse 256 * @return the Oracle PL/SQL parser instance created in constructor 257 */ 258 @Override 259 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 260 // Update token list for reused parser instance 261 this.fplsqlparser.sourcetokenlist = tokens; 262 return this.fplsqlparser; 263 } 264 265 /** 266 * Call Oracle-specific tokenization logic. 267 * <p> 268 * Delegates to dooraclesqltexttotokenlist which handles Oracle's 269 * specific keyword recognition, SQL*Plus commands, forward slash 270 * disambiguation, and token generation. 271 */ 272 @Override 273 protected void tokenizeVendorSql() { 274 dooraclesqltexttotokenlist(); 275 } 276 277 /** 278 * Post-tokenization: merge ${...} template variable tokens into single IDENT tokens. 279 * Template syntax like ${if(len(X) == 0, "", "...")} is used by BI tools. 280 */ 281 @Override 282 protected void doAfterTokenize(TSourceTokenList tokens) { 283 super.doAfterTokenize(tokens); 284 mergeTemplateVariableTokens(tokens); 285 } 286 287 private void mergeTemplateVariableTokens(TSourceTokenList tokens) { 288 for (int i = 0; i < tokens.size() - 1; i++) { 289 TSourceToken dollar = tokens.get(i); 290 291 // Match either bare '$' (self-char) or '$IDENT' like $P, $X (identifier starting with $) 292 boolean isDollarChar = (dollar.tokencode == '$'); 293 boolean isDollarIdent = (dollar.tokencode == TBaseType.ident 294 && dollar.astext != null && dollar.astext.startsWith("$")); 295 if (!isDollarChar && !isDollarIdent) continue; 296 297 // Find next non-whitespace token — for $IDENT pattern, require immediate '{' (no whitespace) 298 int braceIdx = i + 1; 299 if (isDollarChar) { 300 while (braceIdx < tokens.size() && tokens.get(braceIdx).tokentype == ETokenType.ttwhitespace) { 301 braceIdx++; 302 } 303 } 304 if (braceIdx >= tokens.size() || tokens.get(braceIdx).tokencode != '{') continue; 305 306 // Found ${ pattern — find matching } with depth tracking 307 int depth = 1; 308 int endIdx = braceIdx + 1; 309 boolean isComplex = false; 310 while (endIdx < tokens.size() && depth > 0) { 311 int code = tokens.get(endIdx).tokencode; 312 if (code == '{') depth++; 313 else if (code == '}') depth--; 314 else if (code == '(' || code == ',' || code == '\'' || code == '"') isComplex = true; 315 if (depth > 0) endIdx++; 316 } 317 if (depth != 0) continue; // unclosed 318 319 // Build merged token text 320 StringBuilder sb = new StringBuilder(); 321 for (int j = i; j <= endIdx; j++) { 322 sb.append(tokens.get(j).astext); 323 } 324 325 if (isComplex && isDollarChar) { 326 // Complex template starting with bare $ like ${if(len(X)==0,...)} 327 // These expand to SQL fragments (e.g., AND clauses) at runtime, 328 // so convert to whitespace to let parser skip them entirely. 329 for (int j = i; j <= endIdx; j++) { 330 tokens.get(j).tokentype = ETokenType.ttwhitespace; 331 tokens.get(j).tokencode = TBaseType.lexspace; 332 } 333 } else { 334 // Simple template like ${NAME}, or JasperReports $P{VAR}/$X{IN,COL,PARAM} 335 // These expand to single values/expressions, so merge into IDENT placeholder. 336 dollar.astext = sb.toString(); 337 dollar.tokencode = TBaseType.ident; 338 dollar.tokentype = ETokenType.ttidentifier; 339 // Convert remaining tokens to whitespace so parser skips them even if 340 // tokenstatus is overwritten by statement splitter (tsignoredbygetrawstatement) 341 for (int j = i + 1; j <= endIdx; j++) { 342 tokens.get(j).tokentype = ETokenType.ttwhitespace; 343 tokens.get(j).tokencode = TBaseType.lexspace; 344 } 345 } 346 347 i = endIdx; // skip past merged tokens 348 } 349 } 350 351 /** 352 * Setup Oracle parsers for raw statement extraction. 353 * <p> 354 * Oracle uses dual parsers (SQL + PL/SQL), so we inject sqlcmds and 355 * update token lists for both parsers. 356 */ 357 @Override 358 protected void setupVendorParsersForExtraction() { 359 // Inject sqlcmds into BOTH parsers (SQL + PL/SQL) 360 this.fparser.sqlcmds = this.sqlcmds; 361 this.fplsqlparser.sqlcmds = this.sqlcmds; 362 363 // Update token list for BOTH parsers 364 this.fparser.sourcetokenlist = this.sourcetokenlist; 365 this.fplsqlparser.sourcetokenlist = this.sourcetokenlist; 366 } 367 368 /** 369 * Call Oracle-specific raw statement extraction logic. 370 * <p> 371 * Delegates to dooraclegetrawsqlstatements which handles Oracle's 372 * statement delimiters (semicolon and forward slash). 373 */ 374 @Override 375 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 376 dooraclegetrawsqlstatements(builder); 377 } 378 379 /** 380 * Perform full parsing of statements with syntax checking. 381 * <p> 382 * This method orchestrates the parsing of all statements by: 383 * <ul> 384 * <li>Using the raw statements passed from AbstractSqlParser.parse()</li> 385 * <li>Initializing SQL and PL/SQL parsers</li> 386 * <li>Creating global context and frame stack</li> 387 * <li>Looping through each raw statement</li> 388 * <li>Calling parsestatement() on each to build AST</li> 389 * <li>Handling error recovery for CREATE TABLE/INDEX</li> 390 * <li>Collecting syntax errors</li> 391 * </ul> 392 * 393 * <p><b>Important:</b> This method does NOT extract raw statements - they are 394 * passed in as a parameter already extracted by {@link #extractRawStatements}. 395 * This eliminates duplicate extraction that was occurring in the old design. 396 * 397 * <p>Extracted from: TGSqlParser.doparse() lines 16903-17026 398 * 399 * @param context parser context 400 * @param parser main SQL parser (TParserOracleSql) 401 * @param secondaryParser PL/SQL parser (TParserOraclePLSql) 402 * @param tokens source token list 403 * @param rawStatements raw statements already extracted (never null) 404 * @return list of fully parsed statements with AST built 405 */ 406 @Override 407 protected TStatementList performParsing(ParserContext context, 408 TCustomParser parser, 409 TCustomParser secondaryParser, 410 TSourceTokenList tokens, 411 TStatementList rawStatements) { 412 // Store references 413 this.fparser = (TParserOracleSql) parser; 414 this.fplsqlparser = (TParserOraclePLSql) secondaryParser; 415 this.sourcetokenlist = tokens; 416 this.parserContext = context; 417 418 // Use the raw statements passed from AbstractSqlParser.parse() 419 // (already extracted - DO NOT re-extract to avoid duplication) 420 this.sqlstatements = rawStatements; 421 422 // Initialize statement parsing infrastructure 423 this.sqlcmds = SqlCmdsFactory.get(vendor); 424 425 // Inject sqlcmds into parsers (required for make_stmt and other methods) 426 this.fparser.sqlcmds = this.sqlcmds; 427 this.fplsqlparser.sqlcmds = this.sqlcmds; 428 429 // Initialize global context for semantic analysis 430 // CRITICAL: When delegated from TGSqlParser, use TGSqlParser's frameStack 431 // so that variables set in statements can be found by other statements 432 if (context != null && context.getGsqlparser() != null) { 433 TGSqlParser gsqlparser = (TGSqlParser) context.getGsqlparser(); 434 this.frameStack = gsqlparser.getFrameStack(); 435 436 // CRITICAL: Set gsqlparser on the NodeFactory - matches TGSqlParser behavior 437 // This is needed for proper AST node creation during parsing 438 // Without this, expression traversal order may differ, causing 439 // dataflow constant ordering issues 440 this.fparser.getNf().setGsqlParser(gsqlparser); 441 this.fplsqlparser.getNf().setGsqlParser(gsqlparser); 442 443 // Create global context if needed 444 this.globalContext = new TContext(); 445 this.sqlEnv = new TSQLEnv(this.vendor) { 446 @Override 447 public void initSQLEnv() { 448 } 449 }; 450 this.globalContext.setSqlEnv(this.sqlEnv, this.sqlstatements); 451 } else { 452 initializeGlobalContext(); 453 } 454 455 // Parse each statement with exception handling for robustness 456 for (int i = 0; i < sqlstatements.size(); i++) { 457 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 458 459 try { 460 stmt.setFrameStack(frameStack); 461 462 // Parse the statement 463 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 464 465 // Oracle-specific post-processing (overridden hook method) 466 afterStatementParsed(stmt); 467 468 // Handle error recovery for CREATE TABLE/INDEX 469 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 470 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 471 handleCreateTableErrorRecovery(stmt); 472 } 473 474 // Collect syntax errors 475 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 476 copyErrorsFromStatement(stmt); 477 } 478 479 } catch (Exception ex) { 480 // Use inherited exception handler from AbstractSqlParser 481 // This provides consistent error handling across all database parsers 482 handleStatementParsingException(stmt, i, ex); 483 continue; 484 } 485 } 486 487 // Clean up frame stack 488 if (globalFrame != null) { 489 globalFrame.popMeFromStack(frameStack); 490 } 491 492 return this.sqlstatements; 493 } 494 495 // Note: initializeGlobalContext() inherited from AbstractSqlParser 496 497 /** 498 * Override to provide Oracle-specific post-processing after statement parsing. 499 * <p> 500 * For Oracle, we check if the statement is PL/SQL and recursively find syntax 501 * errors in nested PL/SQL statements. 502 */ 503 @Override 504 protected void afterStatementParsed(TCustomSqlStatement stmt) { 505 if (stmt.isoracleplsql()) { 506 findAllSyntaxErrorsInPlsql(stmt); 507 } 508 } 509 510 /** 511 * Perform Oracle-specific semantic analysis using TSQLResolver. 512 * 513 * <p>This includes: 514 * <ul> 515 * <li>Column-to-table resolution</li> 516 * <li>Dataflow analysis</li> 517 * <li>Reference resolution</li> 518 * <li>Scope resolution</li> 519 * </ul> 520 * 521 * @param context the parser context 522 * @param statements the parsed statements 523 */ 524 @Override 525 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 526 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 527 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 528 resolver.resolve(); 529 } 530 } 531 532 /** 533 * Perform Oracle-specific AST interpretation/evaluation using TASTEvaluator. 534 * 535 * <p>This executes simple SQL statements and evaluates expressions 536 * for static analysis and constant folding. 537 * 538 * @param context the parser context 539 * @param statements the parsed statements 540 */ 541 @Override 542 protected void performInterpreter(ParserContext context, TStatementList statements) { 543 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 544 TLog.clearLogs(); 545 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 546 TLog.enableInterpreterLogOnly(); 547 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 548 astEvaluator.eval(); 549 } 550 } 551 552 // ========== Raw Statement Extraction ========== 553 // These methods extract raw SQL statements from tokens without full parsing 554 // Extracted from TGSqlParser.dooraclegetrawsqlstatements() and related methods 555 556 /** 557 * Extract raw Oracle SQL statements from tokenized source. 558 * <p> 559 * This is the main Oracle statement extraction state machine that: 560 * <ul> 561 * <li>Groups tokens into statement boundaries</li> 562 * <li>Identifies statement types (SQL vs PL/SQL, SQL*Plus commands)</li> 563 * <li>Handles nested PL/SQL blocks (procedures, functions, packages, triggers)</li> 564 * <li>Tracks BEGIN/END pairs and other block delimiters</li> 565 * <li>Detects statement terminators (semicolon, forward slash, period)</li> 566 * </ul> 567 * 568 * <p><b>State Machine:</b> Uses 4 main states: 569 * <ul> 570 * <li>{@code stnormal} - Between statements, looking for start of next statement</li> 571 * <li>{@code stsql} - Inside a SQL statement</li> 572 * <li>{@code stsqlplus} - Inside a SQL*Plus command</li> 573 * <li>{@code ststoredprocedure} - Inside a PL/SQL block (procedure/function/package/trigger)</li> 574 * <li>{@code sterror} - Error recovery mode</li> 575 * </ul> 576 * 577 * <p><b>Extracted from:</b> TGSqlParser.dooraclegetrawsqlstatements() (lines 10071-10859) 578 * 579 * <p><b>Design Note:</b> This method now receives a builder to populate with results, 580 * following Option A design where the vendor-specific method focuses on parsing logic 581 * while extractRawStatements() handles result construction. 582 * 583 * @param builder the result builder to populate with statements and error information 584 */ 585 private void dooraclegetrawsqlstatements(SqlParseResult.Builder builder) { 586 int waitingEnds[] = new int[stored_procedure_nested_level]; 587 stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level]; 588 stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level]; 589 boolean endBySlashOnly = true; 590 int nestedProcedures = 0, nestedParenthesis = 0; 591 // Flag for CREATE MLE MODULE with AS clause - terminates with / not ; 592 boolean mleModuleWithAs = false; 593 // Flag for WITH FUNCTION/PROCEDURE - track BEGIN/END nesting to handle embedded semicolons 594 boolean withPlsqlDefinition = false; 595 int withPlsqlBeginEndNesting = 0; 596 boolean withPlsqlFoundSelect = false; // True when SELECT has been found after WITH FUNCTION 597 // Track whether the current CTE statement's main SELECT has been found 598 // (i.e., the SELECT after WITH name AS (...) at paren level 0) 599 boolean cteMainSelectFound = false; 600 601 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 602 if (!TBaseType.assigned(sourcetokenlist)) { 603 // No tokens available - populate builder with error and return 604 builder.errorCode(1); 605 builder.errorMessage("No source token list available"); 606 builder.sqlStatements(new TStatementList()); 607 return; 608 } 609 610 gcurrentsqlstatement = null; 611 EFindSqlStateType gst = EFindSqlStateType.stnormal; 612 TSourceToken lcprevsolidtoken = null, ast = null; 613 614 // Main tokenization loop 615 for (int i = 0; i < sourcetokenlist.size(); i++) { 616 617 if ((ast != null) && (ast.issolidtoken())) 618 lcprevsolidtoken = ast; 619 620 ast = sourcetokenlist.get(i); 621 sourcetokenlist.curpos = i; 622 623 // Token-specific keyword transformations for Oracle 624 performRawStatementTokenTransformations(ast); 625 626 // State machine processing 627 switch (gst) { 628 case sterror: { 629 if (ast.tokentype == ETokenType.ttsemicolon) { 630 appendToken(gcurrentsqlstatement, ast); 631 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 632 gst = EFindSqlStateType.stnormal; 633 } else { 634 appendToken(gcurrentsqlstatement, ast); 635 } 636 break; 637 } //sterror 638 639 case stnormal: { 640 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 641 || (ast.tokencode == TBaseType.cmtslashstar) 642 || (ast.tokencode == TBaseType.lexspace) 643 || (ast.tokencode == TBaseType.lexnewline) 644 || (ast.tokentype == ETokenType.ttsemicolon)) { 645 if (gcurrentsqlstatement != null) { 646 appendToken(gcurrentsqlstatement, ast); 647 } 648 649 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 650 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 651 // ;;;; continuous semicolon, treat it as comment 652 ast.tokentype = ETokenType.ttsimplecomment; 653 ast.tokencode = TBaseType.cmtdoublehyphen; 654 } 655 } 656 657 continue; 658 } 659 660 if (ast.tokencode == TBaseType.sqlpluscmd) { 661 gst = EFindSqlStateType.stsqlplus; 662 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 663 appendToken(gcurrentsqlstatement, ast); 664 continue; 665 } 666 667 // find a token to start sql or plsql mode 668 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 669 670 if (gcurrentsqlstatement != null) { 671 if (gcurrentsqlstatement.isoracleplsql()) { 672 nestedProcedures = 0; 673 gst = EFindSqlStateType.ststoredprocedure; 674 appendToken(gcurrentsqlstatement, ast); 675 676 switch (gcurrentsqlstatement.sqlstatementtype) { 677 case sstplsql_createprocedure: 678 sptype[nestedProcedures] = stored_procedure_type.procedure; 679 break; 680 case sstplsql_createfunction: 681 sptype[nestedProcedures] = stored_procedure_type.function; 682 break; 683 case sstplsql_createpackage: 684 sptype[nestedProcedures] = stored_procedure_type.package_spec; 685 if (ast.searchToken(TBaseType.rrw_body, 5) != null) { 686 sptype[nestedProcedures] = stored_procedure_type.package_body; 687 } 688 break; 689 case sst_plsql_block: 690 sptype[nestedProcedures] = stored_procedure_type.block_with_declare; 691 if (ast.tokencode == TBaseType.rrw_begin) { 692 sptype[nestedProcedures] = stored_procedure_type.block_with_begin; 693 } 694 break; 695 case sstplsql_createtrigger: 696 sptype[nestedProcedures] = stored_procedure_type.create_trigger; 697 break; 698 case sstoraclecreatelibrary: 699 sptype[nestedProcedures] = stored_procedure_type.create_library; 700 break; 701 case sstplsql_createtype_placeholder: 702 gst = EFindSqlStateType.stsql; 703 break; 704 default: 705 sptype[nestedProcedures] = stored_procedure_type.others; 706 break; 707 } 708 709 if (sptype[0] == stored_procedure_type.block_with_declare) { 710 endBySlashOnly = false; 711 procedure_status[0] = stored_procedure_status.is_as; 712 } else if (sptype[0] == stored_procedure_type.block_with_begin) { 713 endBySlashOnly = false; 714 procedure_status[0] = stored_procedure_status.body; 715 } else if (sptype[0] == stored_procedure_type.procedure) { 716 endBySlashOnly = false; 717 procedure_status[0] = stored_procedure_status.start; 718 } else if (sptype[0] == stored_procedure_type.function) { 719 endBySlashOnly = false; 720 procedure_status[0] = stored_procedure_status.start; 721 } else if (sptype[0] == stored_procedure_type.package_spec) { 722 endBySlashOnly = false; 723 procedure_status[0] = stored_procedure_status.start; 724 } else if (sptype[0] == stored_procedure_type.package_body) { 725 endBySlashOnly = false; 726 procedure_status[0] = stored_procedure_status.start; 727 } else if (sptype[0] == stored_procedure_type.create_trigger) { 728 endBySlashOnly = false; 729 procedure_status[0] = stored_procedure_status.start; 730 } else if (sptype[0] == stored_procedure_type.create_library) { 731 endBySlashOnly = false; 732 procedure_status[0] = stored_procedure_status.bodyend; 733 } else { 734 endBySlashOnly = true; 735 procedure_status[0] = stored_procedure_status.bodyend; 736 } 737 738 if ((ast.tokencode == TBaseType.rrw_begin) 739 || (ast.tokencode == TBaseType.rrw_package) 740 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 741 waitingEnds[nestedProcedures] = 1; 742 } 743 } else { 744 gst = EFindSqlStateType.stsql; 745 appendToken(gcurrentsqlstatement, ast); 746 nestedParenthesis = 0; 747 // Check if this is CREATE MLE MODULE with AS clause (JavaScript code) 748 // If AS is found after LANGUAGE JAVASCRIPT, it terminates with / not ; 749 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclecreatemlemodule) { 750 // Look ahead to see if there's an AS keyword 751 TSourceToken asToken = ast.searchToken(TBaseType.rrw_as, 10); 752 mleModuleWithAs = (asToken != null); 753 } else { 754 mleModuleWithAs = false; 755 } 756 757 // Check if this is WITH FUNCTION/PROCEDURE (Oracle 12c inline PL/SQL) 758 // Need to track BEGIN/END nesting to handle embedded semicolons 759 if (ast.tokencode == TBaseType.rrw_with && gcurrentsqlstatement.isctequery) { 760 // Look ahead for FUNCTION or PROCEDURE keyword 761 TSourceToken nextSolid = ast.nextSolidToken(); 762 if (nextSolid != null && (nextSolid.tokencode == TBaseType.rrw_function 763 || nextSolid.tokencode == TBaseType.rrw_procedure)) { 764 withPlsqlDefinition = true; 765 withPlsqlBeginEndNesting = 0; 766 } 767 } 768 } 769 } else { 770 //error token found 771 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 772 , "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 773 774 ast.tokentype = ETokenType.tttokenlizererrortoken; 775 gst = EFindSqlStateType.sterror; 776 777 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 778 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 779 appendToken(gcurrentsqlstatement, ast); 780 } 781 782 break; 783 } // stnormal 784 785 case stsqlplus: { 786 if (ast.insqlpluscmd) { 787 appendToken(gcurrentsqlstatement, ast); 788 } else { 789 gst = EFindSqlStateType.stnormal; //this token must be newline, 790 appendToken(gcurrentsqlstatement, ast); // so add it here 791 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 792 } 793 794 break; 795 }//case stsqlplus 796 797 case stsql: { 798 // For WITH FUNCTION/PROCEDURE, track BEGIN/END nesting and when SELECT is found 799 if (withPlsqlDefinition) { 800 if (ast.tokencode == TBaseType.rrw_begin) { 801 withPlsqlBeginEndNesting++; 802 } else if (ast.tokencode == TBaseType.rrw_end) { 803 withPlsqlBeginEndNesting--; 804 if (withPlsqlBeginEndNesting < 0) withPlsqlBeginEndNesting = 0; 805 } else if (ast.tokencode == TBaseType.rrw_select && withPlsqlBeginEndNesting == 0) { 806 // Found SELECT after all function definitions are done 807 withPlsqlFoundSelect = true; 808 } 809 } 810 811 // For CREATE MLE MODULE with AS clause, don't terminate on semicolon 812 // The JavaScript code may contain semicolons; wait for / to terminate 813 // For WITH FUNCTION/PROCEDURE, don't terminate on semicolon until SELECT is found 814 // (the semicolons in function body and after END are part of the function definition) 815 boolean skipSemicolonTermination = mleModuleWithAs || (withPlsqlDefinition && !withPlsqlFoundSelect); 816 if (ast.tokentype == ETokenType.ttsemicolon && !skipSemicolonTermination) { 817 gst = EFindSqlStateType.stnormal; 818 appendToken(gcurrentsqlstatement, ast); 819 gcurrentsqlstatement.semicolonended = ast; 820 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 821 mleModuleWithAs = false; // Reset flag 822 withPlsqlDefinition = false; // Reset WITH FUNCTION flag 823 withPlsqlBeginEndNesting = 0; 824 cteMainSelectFound = false; 825 withPlsqlFoundSelect = false; 826 continue; 827 } 828 829 if (sourcetokenlist.sqlplusaftercurtoken()) //most probably is / cmd 830 { 831 gst = EFindSqlStateType.stnormal; 832 appendToken(gcurrentsqlstatement, ast); 833 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 834 mleModuleWithAs = false; // Reset flag 835 continue; 836 } 837 838 if (ast.tokencode == '(') nestedParenthesis++; 839 if (ast.tokencode == ')') { 840 nestedParenthesis--; 841 if (nestedParenthesis < 0) nestedParenthesis = 0; 842 } 843 844 Boolean findNewStmt = false; 845 TCustomSqlStatement lcStmt = null; 846 // Check for new statement: CREATE TABLE (original), or SELECT inside a non-CTE SELECT 847 boolean shouldCheckNewStmt = false; 848 if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable)) { 849 shouldCheckNewStmt = true; 850 } else if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 851 && (ast.tokencode == TBaseType.rrw_select || ast.tokencode == TBaseType.rrw_with)) { 852 // Check if current statement is a CTE (starts with WITH) 853 boolean isCteContext = false; 854 for (int si = 0; si < gcurrentsqlstatement.sourcetokenlist.size(); si++) { 855 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(si); 856 if (st.tokentype == ETokenType.ttwhitespace || st.tokentype == ETokenType.ttreturn 857 || st.tokencode == TBaseType.cmtdoublehyphen || st.tokencode == TBaseType.cmtslashstar) { 858 continue; 859 } 860 if (st.tokencode == TBaseType.rrw_with) { 861 isCteContext = true; 862 } 863 break; 864 } 865 // Don't split if previous token makes this SELECT part of current statement: 866 // - Set operators: UNION, INTERSECT, MINUS, EXCEPT, ALL 867 // - Left paren: (SELECT ...) — SELECT is main query of parenthesized expr 868 boolean suppressSplit = false; 869 if (ast.tokencode == TBaseType.rrw_select && lcprevsolidtoken != null) { 870 int prevCode = lcprevsolidtoken.tokencode; 871 if (prevCode == TBaseType.rrw_union || prevCode == TBaseType.rrw_intersect 872 || prevCode == TBaseType.rrw_minus || prevCode == TBaseType.rrw_except 873 || prevCode == TBaseType.rrw_all 874 || prevCode == '(') { 875 suppressSplit = true; 876 } 877 } 878 if (suppressSplit) { 879 // SELECT is part of current statement — don't split 880 } else if (!isCteContext) { 881 // Non-CTE SELECT: any SELECT/WITH at paren level 0 starts a new statement 882 shouldCheckNewStmt = true; 883 } else if (cteMainSelectFound) { 884 // CTE context: main SELECT already consumed, so this SELECT/WITH 885 // at paren level 0 is a new statement 886 shouldCheckNewStmt = true; 887 } else if (ast.tokencode == TBaseType.rrw_select) { 888 // CTE context: this is the main SELECT after WITH name AS (...) 889 cteMainSelectFound = true; 890 // Don't split — this SELECT is part of the CTE statement 891 } 892 // If ast is WITH and main SELECT not yet found, it could be another 893 // CTE definition (WITH a AS (...), b AS (...)) — don't split 894 } 895 if (shouldCheckNewStmt) { 896 // For SELECT-after-SELECT/WITH splitting, use stnormal so issql can detect CTE starts. 897 // For CREATE TABLE, preserve original stsql state to avoid false positives 898 // (e.g., INSERT/DELETE keywords in blockchain table clauses). 899 EFindSqlStateType issqlState = (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 900 ? EFindSqlStateType.stnormal : gst; 901 lcStmt = sqlcmds.issql(ast, issqlState, gcurrentsqlstatement); 902 if (lcStmt != null) { 903 findNewStmt = true; 904 if (lcStmt.sqlstatementtype == ESqlStatementType.sstselect) { 905 TSourceToken prevst = ast.prevSolidToken(); 906 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable) { 907 // For CREATE TABLE, suppress split when SELECT follows AS/(/): AS (SELECT ...) 908 if ((prevst.tokencode == TBaseType.rrw_as) || (prevst.tokencode == '(') || (prevst.tokencode == ')')) { 909 findNewStmt = false; 910 } 911 } 912 // For SELECT-after-SELECT/WITH splitting at paren level 0, 913 // no suppression needed — the new SELECT/WITH is a new statement 914 } 915 } 916 } 917 918 if (findNewStmt) { 919 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 920 gcurrentsqlstatement = lcStmt; 921 cteMainSelectFound = false; // Reset for new statement 922 nestedParenthesis = 0; // Reset paren tracking for new statement 923 appendToken(gcurrentsqlstatement, ast); 924 continue; 925 } else 926 appendToken(gcurrentsqlstatement, ast); 927 928 break; 929 }//case stsql 930 931 case ststoredprocedure: { 932 933 if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) { 934 appendToken(gcurrentsqlstatement, ast); 935 } 936 937 switch (procedure_status[nestedProcedures]) { 938 case cursor_declare: 939 if (ast.tokencode == ';') { 940 nestedProcedures--; 941 if (nestedProcedures < 0) { 942 nestedProcedures = 0; 943 } 944 } 945 break; 946 case start: 947 if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) { 948 if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) { 949 if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) { 950 // when it's a package specification, only top level accept as/is 951 } else { 952 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 953 if (ast.searchToken("language", 1) != null) { 954 if (nestedProcedures == 0) { 955 gst = EFindSqlStateType.stsql; 956 } else { 957 procedure_status[nestedProcedures] = stored_procedure_status.body; 958 nestedProcedures--; 959 } 960 } 961 } 962 } 963 } else if (ast.tokencode == TBaseType.rrw_begin) { 964 if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 965 waitingEnds[nestedProcedures]++; 966 } 967 if (nestedProcedures > 0) { 968 nestedProcedures--; 969 } 970 procedure_status[nestedProcedures] = stored_procedure_status.body; 971 } else if (ast.tokencode == TBaseType.rrw_end) { 972 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1) 973 && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body) 974 || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) { 975 nestedProcedures--; 976 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 977 } 978 } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 979 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 980 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 981 nestedProcedures--; 982 nestedProcedures++; 983 waitingEnds[nestedProcedures] = 0; 984 procedure_status[nestedProcedures] = stored_procedure_status.start; 985 } 986 } else if (ast.tokencode == TBaseType.rrw_oracle_cursor) { 987 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 988 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 989 nestedProcedures--; 990 nestedProcedures++; 991 waitingEnds[nestedProcedures] = 0; 992 procedure_status[nestedProcedures] = stored_procedure_status.cursor_declare; 993 } 994 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) { 995 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 996 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) 997 && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 998 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 999 gst = EFindSqlStateType.stnormal; 1000 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1001 1002 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1003 appendToken(gcurrentsqlstatement, ast); 1004 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1005 } else if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 1006 if (ast.tokencode == TBaseType.rrw_trigger) { 1007 TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1); 1008 if (compoundSt != null) { 1009 procedure_status[nestedProcedures] = stored_procedure_status.body; 1010 waitingEnds[nestedProcedures]++; 1011 } 1012 } 1013 } else if ((sptype[nestedProcedures] == stored_procedure_type.function) 1014 && (ast.tokencode == TBaseType.rrw_teradata_using)) { 1015 if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) { 1016 if (nestedProcedures == 0) { 1017 gst = EFindSqlStateType.stsql; 1018 } else { 1019 procedure_status[nestedProcedures] = stored_procedure_status.body; 1020 nestedProcedures--; 1021 } 1022 } 1023 } 1024 break; 1025 case is_as: 1026 if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 1027 nestedProcedures++; 1028 if (nestedProcedures > stored_procedure_nested_level - 1) { 1029 gst = EFindSqlStateType.sterror; 1030 nestedProcedures--; 1031 } else { 1032 waitingEnds[nestedProcedures] = 0; 1033 procedure_status[nestedProcedures] = stored_procedure_status.start; 1034 } 1035 } else if (ast.tokencode == TBaseType.rrw_begin) { 1036 if ((nestedProcedures == 0) && 1037 ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1038 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1039 // top level package begin already counted 1040 } else { 1041 waitingEnds[nestedProcedures]++; 1042 } 1043 procedure_status[nestedProcedures] = stored_procedure_status.body; 1044 } else if (ast.tokencode == TBaseType.rrw_end) { 1045 if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) 1046 && ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1047 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1048 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1049 waitingEnds[nestedProcedures]--; 1050 } else { 1051 waitingEnds[nestedProcedures]--; 1052 } 1053 } else if (ast.tokencode == TBaseType.rrw_case) { 1054 if (ast.searchToken(';', 1) == null) { 1055 waitingEnds[nestedProcedures]++; 1056 } 1057 } 1058 break; 1059 case body: 1060 if (ast.tokencode == TBaseType.rrw_begin) { 1061 waitingEnds[nestedProcedures]++; 1062 } else if (ast.tokencode == TBaseType.rrw_if) { 1063 if (ast.searchToken(';', 2) == null) { 1064 waitingEnds[nestedProcedures]++; 1065 } 1066 } else if (ast.tokencode == TBaseType.rrw_case) { 1067 if (ast.searchToken(';', 2) == null) { 1068 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 1069 waitingEnds[nestedProcedures]++; 1070 } 1071 } 1072 } else if (ast.tokencode == TBaseType.rrw_loop) { 1073 if (!((ast.searchToken(TBaseType.rrw_end, -1) != null) 1074 && (ast.searchToken(';', 2) != null))) { 1075 waitingEnds[nestedProcedures]++; 1076 } 1077 } else if (ast.tokencode == TBaseType.rrw_end) { 1078 waitingEnds[nestedProcedures]--; 1079 if (waitingEnds[nestedProcedures] == 0) { 1080 if (nestedProcedures == 0) { 1081 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1082 } else { 1083 nestedProcedures--; 1084 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 1085 } 1086 } 1087 } else if ((waitingEnds[nestedProcedures] == 0) 1088 && (ast.tokentype == ETokenType.ttslash) 1089 && (ast.tokencode == TBaseType.sqlpluscmd)) { 1090 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1091 gst = EFindSqlStateType.stnormal; 1092 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1093 1094 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1095 appendToken(gcurrentsqlstatement, ast); 1096 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1097 } 1098 break; 1099 case bodyend: 1100 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 1101 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 1102 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1103 gst = EFindSqlStateType.stnormal; 1104 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1105 1106 //make / a sqlplus cmd 1107 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1108 appendToken(gcurrentsqlstatement, ast); 1109 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1110 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 1111 // single dot at a seperate line 1112 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1113 gst = EFindSqlStateType.stnormal; 1114 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1115 1116 //make ttperiod a sqlplus cmd 1117 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1118 appendToken(gcurrentsqlstatement, ast); 1119 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1120 } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) { 1121 appendToken(gcurrentsqlstatement, ast); 1122 gst = EFindSqlStateType.stnormal; 1123 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1124 } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) { 1125 appendToken(gcurrentsqlstatement, ast); 1126 gst = EFindSqlStateType.stnormal; 1127 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1128 } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) { 1129 appendToken(gcurrentsqlstatement, ast); 1130 gst = EFindSqlStateType.stnormal; 1131 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1132 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1133 && ((ast.searchToken(TBaseType.rrw_package, 4) != null) || (ast.searchToken(TBaseType.rrw_package, 5) != null)) 1134 && (!endBySlashOnly)) { 1135 appendToken(gcurrentsqlstatement, ast); 1136 gst = EFindSqlStateType.stnormal; 1137 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1138 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1139 && ((ast.searchToken(TBaseType.rrw_procedure, 4) != null) 1140 || (ast.searchToken(TBaseType.rrw_function, 4) != null) 1141 || (ast.searchToken(TBaseType.rrw_view, 4) != null) 1142 || (ast.searchToken(TBaseType.rrw_oracle_synonym, 4) != null) 1143 || (ast.searchToken(TBaseType.rrw_trigger, 4) != null)) 1144 && (!endBySlashOnly)) { 1145 appendToken(gcurrentsqlstatement, ast); 1146 gst = EFindSqlStateType.stnormal; 1147 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1148 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) { 1149 appendToken(gcurrentsqlstatement, ast); 1150 gst = EFindSqlStateType.stnormal; 1151 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1152 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) { 1153 appendToken(gcurrentsqlstatement, ast); 1154 gst = EFindSqlStateType.stnormal; 1155 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1156 } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) { 1157 appendToken(gcurrentsqlstatement, ast); 1158 gst = EFindSqlStateType.stnormal; 1159 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1160 } else if ((ast.searchToken(TBaseType.rrw_call, 1) != null) && (!endBySlashOnly)) { 1161 appendToken(gcurrentsqlstatement, ast); 1162 gst = EFindSqlStateType.stnormal; 1163 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1164 } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) { 1165 appendToken(gcurrentsqlstatement, ast); 1166 gst = EFindSqlStateType.stnormal; 1167 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1168 } else if ((ast.searchToken(TBaseType.rrw_declare, 1) != null) && (!endBySlashOnly)) { 1169 appendToken(gcurrentsqlstatement, ast); 1170 gst = EFindSqlStateType.stnormal; 1171 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1172 } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) 1173 && (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) { 1174 appendToken(gcurrentsqlstatement, ast); 1175 gst = EFindSqlStateType.stnormal; 1176 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1177 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) 1178 && (ast.searchToken(TBaseType.rrw_table, 2) != null) && (!endBySlashOnly)) { 1179 appendToken(gcurrentsqlstatement, ast); 1180 gst = EFindSqlStateType.stnormal; 1181 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1182 } else { 1183 appendToken(gcurrentsqlstatement, ast); 1184 } 1185 break; 1186 case end: 1187 break; 1188 default: 1189 break; 1190 } 1191 1192 if (ast.tokencode == TBaseType.sqlpluscmd) { 1193 int m = flexer.getkeywordvalue(ast.getAstext()); 1194 if (m != 0) { 1195 ast.tokencode = m; 1196 } else if (ast.tokentype == ETokenType.ttslash) { 1197 ast.tokencode = '/'; 1198 } else { 1199 ast.tokencode = TBaseType.ident; 1200 } 1201 } 1202 1203 final int wrapped_keyword_max_pos = 20; 1204 if ((ast.tokencode == TBaseType.rrw_wrapped) 1205 && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) { 1206 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) { 1207 ((gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true); 1208 } 1209 1210 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) { 1211 if (ast.prevSolidToken() != null) { 1212 ((gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) gcurrentsqlstatement) 1213 .setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken())); 1214 } 1215 } 1216 } 1217 1218 break; 1219 } //ststoredprocedure 1220 1221 } //switch 1222 }//for 1223 1224 //last statement 1225 if ((gcurrentsqlstatement != null) && 1226 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || 1227 (gst == EFindSqlStateType.sterror))) { 1228 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, true, builder); 1229 } 1230 1231 // Populate builder with results 1232 builder.sqlStatements(this.sqlstatements); 1233 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 1234 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 1235 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 1236 builder.errorMessage(syntaxErrors.isEmpty() ? "" : 1237 String.format("Raw extraction completed with %d error(s)", syntaxErrors.size())); 1238 } 1239 1240 /** 1241 * Handle token transformations during raw statement extraction. 1242 * <p> 1243 * This performs Oracle-specific keyword disambiguation that must happen 1244 * before statement boundary detection. Examples: 1245 * <ul> 1246 * <li>RETURN after WHERE → treat as identifier</li> 1247 * <li>VALUE after BY → mark as value_after_by</li> 1248 * <li>NEW → treat as identifier or constructor based on context</li> 1249 * <li>And many more Oracle-specific cases</li> 1250 * </ul> 1251 * 1252 * @param ast current token being processed 1253 */ 1254 private void performRawStatementTokenTransformations(TSourceToken ast) { 1255 // This method contains the keyword transformation logic from dooraclegetrawsqlstatements 1256 // It's been extracted to keep the main method more readable 1257 1258 if (ast.tokencode == TBaseType.rrw_return) { 1259 TSourceToken stMatch = ast.searchToken(TBaseType.rrw_where, 1); 1260 if (stMatch != null) { 1261 ast.tokencode = TBaseType.ident; 1262 } 1263 } else if (ast.tokencode == TBaseType.rrw_value_oracle) { 1264 TSourceToken stBy = ast.searchToken(TBaseType.rrw_by, -1); 1265 if (stBy != null) { 1266 ast.tokencode = TBaseType.rrw_value_after_by; 1267 } 1268 } else if (ast.tokencode == TBaseType.rrw_new_oracle) { 1269 TSourceToken stRightParen = ast.searchToken(')', -1); 1270 if (stRightParen != null) { 1271 ast.tokencode = TBaseType.ident; 1272 } 1273 TSourceToken stDot = ast.searchToken('.', 1); 1274 if (stDot != null) { 1275 ast.tokencode = TBaseType.ident; 1276 } 1277 1278 TSourceToken stNext = ast.searchTokenAfterObjectName(); 1279 stDot = ast.searchToken('.', 1); 1280 if ((stDot == null) && (stNext != null) && (stNext.tokencode == '(')) { 1281 ast.tokencode = TBaseType.rrw_oracle_new_constructor; 1282 } 1283 } else if (ast.tokencode == TBaseType.rrw_chr_oracle) { 1284 TSourceToken stLeftParen = ast.searchToken('(', 1); 1285 if (stLeftParen == null) { 1286 ast.tokencode = TBaseType.ident; 1287 } 1288 } else if (ast.tokencode == TBaseType.rrw_log_oracle) { 1289 TSourceToken stNext = ast.searchToken(TBaseType.rrw_errors_oracle, 1); 1290 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_view, -1); 1291 if (stPrev == null) { 1292 stPrev = ast.searchToken(TBaseType.rrw_oracle_supplemental, -1); 1293 } 1294 if ((stNext == null) && (stPrev == null)) { 1295 ast.tokencode = TBaseType.ident; 1296 } 1297 } else if (ast.tokencode == TBaseType.rrw_delete) { 1298 TSourceToken stPrev = ast.searchToken('.', -1); 1299 if (stPrev != null) { 1300 ast.tokencode = TBaseType.ident; 1301 } 1302 } else if (ast.tokencode == TBaseType.rrw_partition) { 1303 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_add, -1); 1304 if (stPrev != null) { 1305 stPrev.tokencode = TBaseType.rrw_add_p; 1306 } 1307 } else if (ast.tokencode == TBaseType.rrw_oracle_column) { 1308 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_oracle_modify, -1); 1309 if (stPrev != null) { 1310 ast.tokencode = TBaseType.rrw_oracle_column_after_modify; 1311 } 1312 } else if (ast.tokencode == TBaseType.rrw_oracle_apply) { 1313 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_outer, -1); 1314 if (stPrev != null) { 1315 stPrev.tokencode = TBaseType.ORACLE_OUTER2; 1316 } 1317 } else if (ast.tokencode == TBaseType.rrw_oracle_subpartition) { 1318 TSourceToken stNext = ast.searchToken("(", 2); 1319 if (stNext != null) { 1320 TSourceToken st1 = ast.nextSolidToken(); 1321 if (st1.toString().equalsIgnoreCase("template")) { 1322 // don't change, keep as RW_SUBPARTITION 1323 } else { 1324 ast.tokencode = TBaseType.rrw_oracle_subpartition_tablesample; 1325 } 1326 } 1327 } else if (ast.tokencode == TBaseType.rrw_primary) { 1328 TSourceToken stNext = ast.searchToken("key", 1); 1329 if (stNext == null) { 1330 ast.tokencode = TBaseType.ident; 1331 } 1332 } else if (ast.tokencode == TBaseType.rrw_oracle_offset) { 1333 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_row, 2); 1334 if (stNext == null) { 1335 stNext = ast.searchToken(TBaseType.rrw_oracle_rows, 2); 1336 } 1337 if (stNext != null) { 1338 ast.tokencode = TBaseType.rrw_oracle_offset_row; 1339 } 1340 } else if (ast.tokencode == TBaseType.rrw_translate) { 1341 TSourceToken stNext = ast.searchToken("(", 2); 1342 if (stNext == null) { 1343 ast.tokencode = TBaseType.ident; 1344 } 1345 } else if (ast.tokencode == TBaseType.rrw_constraint) { 1346 TSourceToken stNext = ast.nextSolidToken(); 1347 if (stNext == null) { 1348 ast.tokencode = TBaseType.ident; 1349 } else { 1350 if (stNext.tokencode != TBaseType.ident) { 1351 ast.tokencode = TBaseType.ident; 1352 } 1353 } 1354 } else if (ast.tokencode == TBaseType.rrw_oracle_without) { 1355 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_count, 1); 1356 if (stNext != null) { 1357 ast.tokencode = TBaseType.rrw_oracle_without_before_count; 1358 } 1359 } else if (ast.tokencode == TBaseType.rrw_bulk) { 1360 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_collect, 1); 1361 if (stNext == null) { 1362 ast.tokencode = TBaseType.ident; 1363 } 1364 } else if (ast.tokencode == TBaseType.rrw_oracle_model) { 1365 TSourceToken stNext = ast.nextSolidToken(); 1366 if (stNext != null) { 1367 switch (stNext.toString().toUpperCase()) { 1368 case "RETURN": 1369 case "REFERENCE": 1370 case "IGNORE": 1371 case "KEEP": 1372 case "UNIQUE": 1373 case "PARTITION": 1374 case "DIMENSION": 1375 case "MEASURES": 1376 case "RULES": 1377 ast.tokencode = TBaseType.rrw_oracle_model_in_model_clause; 1378 break; 1379 default: 1380 ; 1381 } 1382 } 1383 } 1384 } 1385 1386 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 1387 if (statement == null || token == null) { 1388 return; 1389 } 1390 token.stmt = statement; 1391 statement.sourcetokenlist.add(token); 1392 } 1393 1394 // ========== Error Handling and Recovery ========== 1395 1396 /** 1397 * Find all syntax errors in PL/SQL statements recursively. 1398 * Extracted from TGSqlParser.findAllSyntaxErrorsInPlsql(). 1399 */ 1400 private void findAllSyntaxErrorsInPlsql(TCustomSqlStatement psql) { 1401 if (psql.getErrorCount() > 0) { 1402 copyErrorsFromStatement(psql); 1403 } 1404 1405 for (int k = 0; k < psql.getStatements().size(); k++) { 1406 findAllSyntaxErrorsInPlsql(psql.getStatements().get(k)); 1407 } 1408 } 1409 1410 /** 1411 * Handle error recovery for CREATE TABLE/INDEX statements. 1412 * Oracle allows table properties that may not be fully parsed. 1413 * This method marks unparseable properties as SQL*Plus commands to skip them. 1414 * 1415 * <p>Extracted from TGSqlParser.doparse() lines 16916-16971 1416 */ 1417 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 1418 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) || 1419 (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) && 1420 (!TBaseType.c_createTableStrictParsing)) { 1421 1422 // Find the closing parenthesis of table definition 1423 int nested = 0; 1424 boolean isIgnore = false, isFoundIgnoreToken = false; 1425 TSourceToken firstIgnoreToken = null; 1426 1427 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 1428 TSourceToken st = stmt.sourcetokenlist.get(k); 1429 1430 if (isIgnore) { 1431 if (st.issolidtoken() && (st.tokencode != ';')) { 1432 isFoundIgnoreToken = true; 1433 if (firstIgnoreToken == null) { 1434 firstIgnoreToken = st; 1435 } 1436 } 1437 if (st.tokencode != ';') { 1438 st.tokencode = TBaseType.sqlpluscmd; 1439 } 1440 continue; 1441 } 1442 1443 if (st.tokencode == (int) ')') { 1444 nested--; 1445 if (nested == 0) { 1446 // Check if next token is "AS ( SELECT" 1447 boolean isSelect = false; 1448 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 1449 if (st1 != null) { 1450 TSourceToken st2 = st.searchToken((int) '(', 2); 1451 if (st2 != null) { 1452 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 1453 isSelect = (st3 != null); 1454 } 1455 } 1456 if (!isSelect) isIgnore = true; 1457 } 1458 } 1459 1460 if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) { 1461 nested++; 1462 } 1463 } 1464 1465 // Verify it's a valid Oracle table property 1466 if ((firstIgnoreToken != null) && 1467 (!TBaseType.searchOracleTablePros(firstIgnoreToken.toString()))) { 1468 // Not a valid property, keep the error 1469 isFoundIgnoreToken = false; 1470 } 1471 1472 // Retry parsing if we found ignoreable properties 1473 if (isFoundIgnoreToken) { 1474 stmt.clearError(); 1475 stmt.parsestatement(null, false); 1476 } 1477 } 1478 } 1479 1480 /** 1481 * Copy syntax errors from a statement to our error list. 1482 * Extracted from TGSqlParser.copyerrormsg(). 1483 */ 1484 1485 @Override 1486 public String toString() { 1487 return "OracleSqlParser{vendor=" + vendor + "}"; 1488 } 1489 1490 // ========== Main Oracle Tokenization ========== 1491 // Core tokenization logic extracted from TGSqlParser.dooraclesqltexttotokenlist() 1492 1493 /** 1494 * Perform Oracle-specific tokenization with SQL*Plus command detection. 1495 * <p> 1496 * This method implements Oracle's complex tokenization rules including: 1497 * <ul> 1498 * <li>SQL*Plus command detection (SPOOL, SET, START, etc.)</li> 1499 * <li>Forward slash disambiguation (division vs PL/SQL delimiter)</li> 1500 * <li>Oracle-specific keyword transformations (INNER, TYPE, FULL, etc.)</li> 1501 * <li>Context-dependent token code modifications</li> 1502 * </ul> 1503 * 1504 * <p><b>State Machine:</b> Uses 5 boolean flags to track tokenization state: 1505 * <ul> 1506 * <li>{@code insqlpluscmd} - Currently inside SQL*Plus command</li> 1507 * <li>{@code isvalidplace} - Valid place to start SQL*Plus command</li> 1508 * <li>{@code waitingreturnforfloatdiv} - Slash seen, waiting for newline</li> 1509 * <li>{@code waitingreturnforsemicolon} - Semicolon seen, waiting for newline</li> 1510 * <li>{@code continuesqlplusatnewline} - SQL*Plus command continues to next line</li> 1511 * </ul> 1512 * 1513 * <p><b>Extracted from:</b> TGSqlParser.dooraclesqltexttotokenlist() (lines 3931-4298) 1514 * 1515 * @throws RuntimeException if tokenization fails 1516 */ 1517 private void dooraclesqltexttotokenlist() { 1518 // Initialize state machine for SQL*Plus command detection 1519 insqlpluscmd = false; 1520 isvalidplace = true; 1521 waitingreturnforfloatdiv = false; 1522 waitingreturnforsemicolon = false; 1523 continuesqlplusatnewline = false; 1524 1525 ESqlPlusCmd currentCmdType = ESqlPlusCmd.spcUnknown; 1526 1527 TSourceToken lct = null, prevst = null; 1528 1529 TSourceToken asourcetoken, lcprevst; 1530 int yychar; 1531 1532 asourcetoken = getanewsourcetoken(); 1533 if (asourcetoken == null) return; 1534 yychar = asourcetoken.tokencode; 1535 1536 while (yychar > 0) { 1537 sourcetokenlist.add(asourcetoken); 1538 1539 switch (yychar) { 1540 case TBaseType.cmtdoublehyphen: 1541 case TBaseType.cmtslashstar: 1542 case TBaseType.lexspace: { 1543 if (insqlpluscmd) { 1544 asourcetoken.insqlpluscmd = true; 1545 } 1546 break; 1547 } 1548 1549 case TBaseType.lexnewline: { 1550 if (insqlpluscmd) { 1551 insqlpluscmd = false; 1552 isvalidplace = true; 1553 1554 if (continuesqlplusatnewline) { 1555 insqlpluscmd = true; 1556 isvalidplace = false; 1557 asourcetoken.insqlpluscmd = true; 1558 } 1559 1560 if (!insqlpluscmd) { 1561 currentCmdType = ESqlPlusCmd.spcUnknown; 1562 } 1563 } 1564 1565 if (waitingreturnforsemicolon) { 1566 isvalidplace = true; 1567 } 1568 1569 if (waitingreturnforfloatdiv) { 1570 isvalidplace = true; 1571 lct.tokencode = TBaseType.sqlpluscmd; 1572 if (lct.tokentype != ETokenType.ttslash) { 1573 lct.tokentype = ETokenType.ttsqlpluscmd; 1574 } 1575 } 1576 1577 if (countLines(asourcetoken.toString()) > 1) { 1578 // There is a line after select, so spool is the right place to start a sqlplus command 1579 isvalidplace = true; 1580 } 1581 1582 flexer.insqlpluscmd = insqlpluscmd; 1583 break; 1584 } 1585 1586 default: { 1587 // Solid token 1588 // Save semicolon flag before clearing: slash after semicolon on 1589 // the same line (e.g. "END; /") should be a SQL*Plus delimiter, 1590 // not division. 1591 boolean prevWasSemicolon = waitingreturnforsemicolon; 1592 continuesqlplusatnewline = false; 1593 waitingreturnforsemicolon = false; 1594 waitingreturnforfloatdiv = false; 1595 1596 if (insqlpluscmd) { 1597 asourcetoken.insqlpluscmd = true; 1598 if (asourcetoken.toString().equalsIgnoreCase("-")) { 1599 continuesqlplusatnewline = true; 1600 } 1601 } else { 1602 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 1603 waitingreturnforsemicolon = true; 1604 } 1605 1606 if ((asourcetoken.tokentype == ETokenType.ttslash) 1607 && (isvalidplace || prevWasSemicolon || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 1608 lct = asourcetoken; 1609 waitingreturnforfloatdiv = true; 1610 } 1611 1612 currentCmdType = TSqlplusCmdStatement.searchCmd(asourcetoken.toString(), asourcetoken.nextToken()); 1613 if (currentCmdType != ESqlPlusCmd.spcUnknown) { 1614 if (isvalidplace) { 1615 TSourceToken lnbreak = null; 1616 boolean aRealSqlplusCmd = true; 1617 if (sourcetokenlist.curpos > 0) { 1618 lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1619 aRealSqlplusCmd = !spaceAtTheEndOfReturnToken(lnbreak.toString()); 1620 } 1621 1622 if (aRealSqlplusCmd) { 1623 asourcetoken.prevTokenCode = asourcetoken.tokencode; 1624 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1625 if (asourcetoken.tokentype != ETokenType.ttslash) { 1626 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1627 } 1628 insqlpluscmd = true; 1629 flexer.insqlpluscmd = insqlpluscmd; 1630 } 1631 } else if ((asourcetoken.tokencode == TBaseType.rrw_connect) && (sourcetokenlist.returnbeforecurtoken(true))) { 1632 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1633 if (asourcetoken.tokentype != ETokenType.ttslash) { 1634 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1635 } 1636 insqlpluscmd = true; 1637 flexer.insqlpluscmd = insqlpluscmd; 1638 } else if (sourcetokenlist.returnbeforecurtoken(true)) { 1639 TSourceToken lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1640 1641 if ((countLines(lnbreak.toString()) > 1) && (!spaceAtTheEndOfReturnToken(lnbreak.toString()))) { 1642 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1643 if (asourcetoken.tokentype != ETokenType.ttslash) { 1644 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1645 } 1646 insqlpluscmd = true; 1647 flexer.insqlpluscmd = insqlpluscmd; 1648 } 1649 } 1650 } 1651 } 1652 1653 isvalidplace = false; 1654 1655 // Oracle-specific keyword handling (inline to match legacy behavior) 1656 if (prevst != null) { 1657 if (prevst.tokencode == TBaseType.rrw_inner) { 1658 if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) { 1659 prevst.tokencode = TBaseType.ident; 1660 } 1661 } else if ((prevst.tokencode == TBaseType.rrw_not) 1662 && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) { 1663 prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE"); 1664 } 1665 } 1666 1667 if (asourcetoken.tokencode == TBaseType.rrw_inner) { 1668 prevst = asourcetoken; 1669 } else if (asourcetoken.tokencode == TBaseType.rrw_not) { 1670 prevst = asourcetoken; 1671 } else { 1672 prevst = null; 1673 } 1674 1675 // Oracle keyword transformations that rely on prev token state 1676 if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD")) 1677 || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) { 1678 lcprevst = getprevsolidtoken(asourcetoken); 1679 if (lcprevst != null) { 1680 if (lcprevst.tokencode == TBaseType.rrw_for) 1681 lcprevst.tokencode = TBaseType.rw_for1; 1682 } 1683 } else if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) { 1684 TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2); 1685 if (stKeep != null) { 1686 stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank; 1687 } 1688 } else if (asourcetoken.tokencode == TBaseType.rrw_full) { 1689 TSourceToken stMatch = asourcetoken.searchToken(TBaseType.rrw_match, -1); 1690 if (stMatch != null) { 1691 asourcetoken.tokencode = TBaseType.RW_FULL2; 1692 } 1693 } else if (asourcetoken.tokencode == TBaseType.rrw_join) { 1694 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1695 if (stFull != null) { 1696 stFull.tokencode = TBaseType.RW_FULL2; 1697 } else { 1698 TSourceToken stNatural = asourcetoken.searchToken(TBaseType.rrw_natural, -4); 1699 if (stNatural != null) { 1700 stNatural.tokencode = TBaseType.RW_NATURAL2; 1701 } 1702 } 1703 } else if (asourcetoken.tokencode == TBaseType.rrw_outer) { 1704 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1705 if (stFull != null) { 1706 stFull.tokencode = TBaseType.RW_FULL2; 1707 } 1708 } else if (asourcetoken.tokencode == TBaseType.rrw_is) { 1709 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1710 if (stType != null) { 1711 stType.tokencode = TBaseType.rrw_type2; 1712 } 1713 } else if (asourcetoken.tokencode == TBaseType.rrw_as) { 1714 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1715 if (stType != null) { 1716 stType.tokencode = TBaseType.rrw_type2; 1717 } 1718 } else if (asourcetoken.tokencode == TBaseType.rrw_oid) { 1719 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1720 if (stType != null) { 1721 stType.tokencode = TBaseType.rrw_type2; 1722 } 1723 } else if (asourcetoken.tokencode == TBaseType.rrw_type) { 1724 TSourceToken stPrev; 1725 stPrev = asourcetoken.searchToken(TBaseType.rrw_drop, -1); 1726 if (stPrev != null) { 1727 asourcetoken.tokencode = TBaseType.rrw_type2; 1728 } 1729 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1730 stPrev = asourcetoken.searchToken(TBaseType.rrw_of, -1); 1731 if (stPrev != null) { 1732 asourcetoken.tokencode = TBaseType.rrw_type2; 1733 } 1734 } 1735 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1736 stPrev = asourcetoken.searchToken(TBaseType.rrw_create, -1); 1737 if (stPrev != null) { 1738 asourcetoken.tokencode = TBaseType.rrw_type2; 1739 } 1740 } 1741 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1742 stPrev = asourcetoken.searchToken(TBaseType.rrw_replace, -1); 1743 if (stPrev != null) { 1744 asourcetoken.tokencode = TBaseType.rrw_type2; 1745 } 1746 } 1747 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1748 stPrev = asourcetoken.searchToken('%', -1); 1749 if (stPrev != null) { 1750 asourcetoken.tokencode = TBaseType.rrw_type2; 1751 } 1752 } 1753 } else if ((asourcetoken.tokencode == TBaseType.rrw_by) || (asourcetoken.tokencode == TBaseType.rrw_to)) { 1754 lcprevst = getprevsolidtoken(asourcetoken); 1755 if (lcprevst != null) { 1756 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("connect"))) { 1757 lcprevst.tokencode = TBaseType.rrw_connect; 1758 lcprevst.tokentype = ETokenType.ttkeyword; 1759 flexer.insqlpluscmd = false; 1760 1761 continuesqlplusatnewline = false; 1762 waitingreturnforsemicolon = false; 1763 waitingreturnforfloatdiv = false; 1764 isvalidplace = false; 1765 insqlpluscmd = false; 1766 } 1767 } 1768 } else if (asourcetoken.tokencode == TBaseType.rrw_with) { 1769 lcprevst = getprevsolidtoken(asourcetoken); 1770 if (lcprevst != null) { 1771 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("start"))) { 1772 lcprevst.tokencode = TBaseType.rrw_start; 1773 lcprevst.tokentype = ETokenType.ttkeyword; 1774 flexer.insqlpluscmd = false; 1775 1776 continuesqlplusatnewline = false; 1777 waitingreturnforsemicolon = false; 1778 waitingreturnforfloatdiv = false; 1779 isvalidplace = false; 1780 insqlpluscmd = false; 1781 } 1782 } 1783 } else if (asourcetoken.tokencode == TBaseType.rrw_set) { 1784 lcprevst = getprevsolidtoken(asourcetoken); 1785 if (lcprevst != null) { 1786 if (lcprevst.getAstext().equalsIgnoreCase("a")) { 1787 TSourceToken lcpp = getprevsolidtoken(lcprevst); 1788 if (lcpp != null) { 1789 if ((lcpp.tokencode == TBaseType.rrw_not) || (lcpp.tokencode == TBaseType.rrw_is)) { 1790 lcprevst.tokencode = TBaseType.rrw_oracle_a_in_aset; 1791 asourcetoken.tokencode = TBaseType.rrw_oracle_set_in_aset; 1792 } 1793 } 1794 } 1795 } 1796 } 1797 1798 break; 1799 } 1800 } 1801 1802 // Get next token 1803 asourcetoken = getanewsourcetoken(); 1804 if (asourcetoken != null) { 1805 yychar = asourcetoken.tokencode; 1806 1807 // Handle special case: dot after SQL*Plus commands 1808 if ((asourcetoken.tokencode == '.') && (getprevsolidtoken(asourcetoken) != null) 1809 && ((currentCmdType == ESqlPlusCmd.spcAppend) 1810 || (currentCmdType == ESqlPlusCmd.spcChange) || (currentCmdType == ESqlPlusCmd.spcInput) 1811 || (currentCmdType == ESqlPlusCmd.spcList) || (currentCmdType == ESqlPlusCmd.spcRun))) { 1812 // a.ent_rp_usr_id is not a real sqlplus command 1813 TSourceToken lcprevst2 = getprevsolidtoken(asourcetoken); 1814 lcprevst2.insqlpluscmd = false; 1815 if (lcprevst2.prevTokenCode != 0) { 1816 lcprevst2.tokencode = lcprevst2.prevTokenCode; 1817 } else { 1818 lcprevst2.tokencode = TBaseType.ident; 1819 } 1820 1821 flexer.insqlpluscmd = false; 1822 continuesqlplusatnewline = false; 1823 waitingreturnforsemicolon = false; 1824 waitingreturnforfloatdiv = false; 1825 isvalidplace = false; 1826 insqlpluscmd = false; 1827 } 1828 } else { 1829 yychar = 0; 1830 1831 if (waitingreturnforfloatdiv) { 1832 // / at the end of line treat as sqlplus command 1833 lct.tokencode = TBaseType.sqlpluscmd; 1834 if (lct.tokentype != ETokenType.ttslash) { 1835 lct.tokentype = ETokenType.ttsqlpluscmd; 1836 } 1837 } 1838 } 1839 1840 if ((yychar == 0) && (prevst != null)) { 1841 if (prevst.tokencode == TBaseType.rrw_inner) { 1842 prevst.tokencode = TBaseType.ident; 1843 } 1844 } 1845 } 1846 } 1847 1848 // ========== Helper Methods for Tokenization ========== 1849 // These methods support Oracle-specific tokenization logic 1850 1851 /** 1852 * Count number of newlines in a string. 1853 * 1854 * @param s string to analyze 1855 * @return number of line breaks (LF or CR) 1856 */ 1857 private int countLines(String s) { 1858 int pos = 0, lf = 0, cr = 0; 1859 1860 while (pos < s.length()) { 1861 if (s.charAt(pos) == '\r') { 1862 cr++; 1863 pos++; 1864 continue; 1865 } 1866 if (s.charAt(pos) == '\n') { 1867 lf++; 1868 pos++; 1869 continue; 1870 } 1871 1872 if (s.charAt(pos) == ' ') { 1873 pos++; 1874 continue; 1875 } 1876 break; 1877 } 1878 1879 if (lf >= cr) return lf; 1880 else return cr; 1881 } 1882 1883 /** 1884 * Check if return token ends with space or tab. 1885 * 1886 * @param s token text 1887 * @return true if ends with space/tab 1888 */ 1889 private boolean spaceAtTheEndOfReturnToken(String s) { 1890 if (s == null) return false; 1891 if (s.length() == 0) return false; 1892 1893 return ((s.charAt(s.length() - 1) == ' ') || (s.charAt(s.length() - 1) == '\t')); 1894 } 1895 1896 /** 1897 * Determine if forward slash should be treated as SQL*Plus command delimiter. 1898 * <p> 1899 * Oracle uses '/' as both division operator and SQL*Plus block delimiter. 1900 * This method disambiguates by checking if the '/' appears at the beginning 1901 * of a line (after a return token without trailing whitespace). 1902 * 1903 * @param pstlist token list 1904 * @param pPos position of '/' token 1905 * @return true if '/' should be SQL*Plus command 1906 */ 1907 private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) { 1908 boolean ret = false; 1909 1910 if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret; 1911 1912 // Token directly before div must be ttreturn without space appending it 1913 gudusoft.gsqlparser.TSourceToken lcst = pstlist.get(pPos - 1); 1914 if (lcst.tokentype != gudusoft.gsqlparser.ETokenType.ttreturn) { 1915 return ret; 1916 } 1917 1918 if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) { 1919 ret = true; 1920 } 1921 1922 return ret; 1923 } 1924 1925 /** 1926 * Get previous non-whitespace token. 1927 * 1928 * @param ptoken current token 1929 * @return previous solid token, or null 1930 */ 1931 private gudusoft.gsqlparser.TSourceToken getprevsolidtoken(gudusoft.gsqlparser.TSourceToken ptoken) { 1932 gudusoft.gsqlparser.TSourceToken ret = null; 1933 TSourceTokenList lctokenlist = ptoken.container; 1934 1935 if (lctokenlist != null) { 1936 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 1937 if (!( 1938 (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttwhitespace) 1939 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttreturn) 1940 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttsimplecomment) 1941 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttbracketedcomment) 1942 )) { 1943 ret = lctokenlist.get(ptoken.posinlist - 1); 1944 } else { 1945 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 1946 } 1947 } 1948 } 1949 return ret; 1950 } 1951}