001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.TCustomLexer; 006import gudusoft.gsqlparser.TCustomParser; 007import gudusoft.gsqlparser.TCustomSqlStatement; 008import gudusoft.gsqlparser.TLexerOceanbaseoracle; 009import gudusoft.gsqlparser.TParserOceanbaseoracle; 010import gudusoft.gsqlparser.TParserOceanbaseoracleplsql; 011import gudusoft.gsqlparser.TSourceToken; 012import gudusoft.gsqlparser.TSourceTokenList; 013import gudusoft.gsqlparser.TStatementList; 014import gudusoft.gsqlparser.TSyntaxError; 015import gudusoft.gsqlparser.EFindSqlStateType; 016import gudusoft.gsqlparser.ESqlPlusCmd; 017import gudusoft.gsqlparser.ETokenType; 018import gudusoft.gsqlparser.ETokenStatus; 019import gudusoft.gsqlparser.ESqlStatementType; 020import gudusoft.gsqlparser.EErrorType; 021import gudusoft.gsqlparser.stmt.oracle.TSqlplusCmdStatement; 022import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 023import gudusoft.gsqlparser.sqlcmds.ISqlCmds; 024import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 025import gudusoft.gsqlparser.compiler.TContext; 026import gudusoft.gsqlparser.sqlenv.TSQLEnv; 027import gudusoft.gsqlparser.compiler.TGlobalScope; 028import gudusoft.gsqlparser.compiler.TFrame; 029import gudusoft.gsqlparser.resolver.TSQLResolver; 030import gudusoft.gsqlparser.TLog; 031import gudusoft.gsqlparser.TGSqlParser; 032import gudusoft.gsqlparser.compiler.TASTEvaluator; 033import java.util.Stack; 034 035import java.io.BufferedReader; 036import java.util.ArrayList; 037import java.util.List; 038 039/** 040 * OceanBase user tenant — Oracle compatibility mode parser (Phase 3 fork). 041 * 042 * <p>This class is a verbatim fork of {@link OracleSqlParser} with class 043 * names rewritten to point at the forked grammar artifacts under 044 * {@code gsp_java_parser/.../parser/oceanbaseoracle/}. The structural 045 * intent is identical to the Oracle parser; what differs is: 046 * 047 * <ul> 048 * <li>It uses the forked {@link gudusoft.gsqlparser.TLexerOceanbaseoracle}, 049 * {@link gudusoft.gsqlparser.TParserOceanbaseoracle}, and 050 * {@link gudusoft.gsqlparser.TParserOceanbaseoracleplsql} (generated 051 * from the forked {@code lzlexoceanbaseoracle.l}, 052 * {@code lzyaccoceanbaseoracle.y}, and 053 * {@code lzyaccoceanbaseoracleplsql.y} sources).</li> 054 * <li>It registers as {@link EDbVendor#dbvoceanbase} so downstream 055 * components (sqlcmds, IdentifierProfile, the various TSQLEnv 056 * EnumMaps) consult OceanBase rules rather than Oracle rules.</li> 057 * <li>It is selected by {@link OceanBaseSqlParser} when the active 058 * {@link gudusoft.gsqlparser.EOBTenantMode} is 059 * {@link gudusoft.gsqlparser.EOBTenantMode#ORACLE}.</li> 060 * </ul> 061 * 062 * <p>Phase 3 ships this fork at zero divergence from the Oracle base 063 * grammar — same shift/reduce conflicts (60 s/r + 2 r/r in SQL parser), 064 * same token positions, same statement coverage including full PL/SQL 065 * (packages, procedures, functions, triggers, anonymous blocks, 066 * SQL*Plus commands, hierarchical query, MERGE, MODEL clause). 067 * OceanBase-specific Oracle-mode extensions (hints, partition 068 * extensions, tenant DDL, outlines, tablegroups) arrive incrementally 069 * in Phase 4 via additive edits to the {@code .y} sources. Each Phase 4 070 * addition is tracked in {@code oceanbaseoracle/FORK_DIVERGENCE.md}. 071 * 072 * <p>Token alignment between {@code lzyaccoceanbaseoracle.y} and 073 * {@code lzyaccoceanbaseoracleplsql.y} is mandatory and enforced by 074 * {@code oceanbaseoracle/check_token_alignment.sh}. Both files MUST 075 * declare identical tokens at identical positions or the SQL lexer and 076 * the PL/SQL parser disagree silently — see CLAUDE.md and the 077 * Phase 0 plan for the rationale. 078 * 079 * <p>Backporting upstream Oracle grammar fixes is documented in the 080 * Phase 0 plan ({@code gsp_java_core/doc/oceanbase/PHASE0_REPORT.md}) 081 * and the per-fork {@code oceanbaseoracle/REGEN.md}. 082 * 083 * @see OracleSqlParser the unmodified base parser this class was forked from 084 * @see OceanBaseSqlParser the mode-routing adapter that selects this parser 085 * @see gudusoft.gsqlparser.TLexerOceanbaseoracle forked lexer 086 * @see gudusoft.gsqlparser.TParserOceanbaseoracle forked SQL parser 087 * @see gudusoft.gsqlparser.TParserOceanbaseoracleplsql forked PL/SQL parser 088 * @since 4.0.1.4 089 */ 090public class OceanBaseOracleSqlParser extends AbstractSqlParser { 091 092 /** 093 * Construct Oracle SQL parser. 094 * <p> 095 * Configures the parser for Oracle database with default delimiters: 096 * <ul> 097 * <li>SQL statements: semicolon (;)</li> 098 * <li>PL/SQL blocks: forward slash (/)</li> 099 * </ul> 100 * <p> 101 * Following the original TGSqlParser pattern, the lexer and parsers are 102 * created once in the constructor and reused for all parsing operations. 103 * This avoids unnecessary object allocation overhead since the parser 104 * is not thread-safe and designed for single-use per instance. 105 */ 106 public OceanBaseOracleSqlParser() { 107 // Phase 3 fork: register as dbvoceanbase even though the lexer/parser 108 // are forked from Oracle. AST nodes built via this parser report 109 // dbvoceanbase via the NodeFactory back-reference fixup in 110 // TGSqlParser.doDelegatedRawParse(); the vendor passed here is the 111 // one consulted by IdentifierProfile and the various EnumMaps in 112 // TSQLEnv. 113 super(EDbVendor.dbvoceanbase); 114 this.delimiterChar = '/'; // PL/SQL delimiter 115 this.defaultDelimiterStr = ";"; // SQL delimiter 116 117 // Pre-initialize sqlcmds to an Oracle-family resolver so the lazy 118 // init in AbstractSqlParser.extractRawStatements() (at 119 // sqlcmds==null check) does NOT default to TSqlCmdsOceanbase. 120 // Oracle-specific PL/SQL block boundary detection requires Oracle 121 // command rules; the dbvoceanbase factory entry returns 122 // TSqlCmdsOceanbase which is a TSqlCmdsMysql subclass and cannot 123 // recognize BEGIN/END blocks. The splitter's internal vendor field 124 // reports dbvoracle in this case; that does not affect AST node 125 // identity, which comes from TGSqlParser.dbVendor (still 126 // dbvoceanbase) via the NodeFactory back-reference fixup in 127 // doDelegatedRawParse(). 128 // 129 // Phase 4 Batch 6: use TSqlCmdsOceanbaseOracle so the splitter 130 // recognizes OceanBase-specific tablegroup DDL boundaries that 131 // base Oracle does not know about. Inherits all Oracle command 132 // surface verbatim via super.initializeCommands() in the subclass. 133 this.sqlcmds = new gudusoft.gsqlparser.sqlcmds.TSqlCmdsOceanbaseOracle(); 134 135 // Create lexer once - will be reused for all parsing operations 136 // (matches original TGSqlParser constructor pattern at line 1033) 137 this.flexer = new TLexerOceanbaseoracle(); 138 this.flexer.delimiterchar = this.delimiterChar; 139 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 140 141 // Set parent's lexer reference for shared tokenization logic 142 this.lexer = this.flexer; 143 144 // Create parsers once - will be reused for all parsing operations 145 // Token list will be set/updated when parsing begins 146 // (matches original TGSqlParser constructor pattern at lines 1036-1040) 147 this.fparser = new TParserOceanbaseoracle(null); 148 this.fplsqlparser = new TParserOceanbaseoracleplsql(null); 149 this.fparser.lexer = this.flexer; 150 this.fplsqlparser.lexer = this.flexer; 151 152 // NOTE: sourcetokenlist and sqlstatements are initialized in AbstractSqlParser constructor 153 } 154 155 // ========== Tokenization State (used during tokenization) ========== 156 // These instance variables are used during the tokenization process 157 // and are set up at the beginning of tokenization 158 159 /** The Oracle lexer used for tokenization */ 160 public TLexerOceanbaseoracle flexer; // Package-accessible for TGSqlParser integration 161 162 // NOTE: sourcetokenlist moved to AbstractSqlParser (inherited) 163 164 /** Optional callback for token processing (can be null) */ 165 private Object tokenHandle; // TTokenCallback interface - keeping as Object for now 166 167 // State variables for tokenization (set during dooraclesqltexttotokenlist()) 168 private boolean continuesqlplusatnewline; 169 private boolean waitingreturnforsemicolon; 170 private boolean waitingreturnforfloatdiv; 171 private boolean isvalidplace; 172 private boolean insqlpluscmd; 173 174 // ========== Statement Parsing State (used during statement parsing) ========== 175 // These instance variables are used during the statement parsing process 176 177 // NOTE: The following fields moved to AbstractSqlParser (inherited): 178 // - sqlcmds (ISqlCmds) 179 // - sqlstatements (TStatementList) 180 // - parserContext (ParserContext) 181 182 /** Current statement being built */ 183 private TCustomSqlStatement gcurrentsqlstatement; 184 185 /** SQL parser (for regular SQL statements) */ 186 private TParserOceanbaseoracle fparser; 187 188 /** PL/SQL parser (for PL/SQL blocks) */ 189 private TParserOceanbaseoracleplsql fplsqlparser; 190 191 // Note: Global context and frame stack fields inherited from AbstractSqlParser: 192 // - protected TContext globalContext 193 // - protected TSQLEnv sqlEnv 194 // - protected Stack<TFrame> frameStack 195 // - protected TFrame globalFrame 196 197 // ========== Enums for State Machine ========== 198 // These enums are used by the dooraclegetrawsqlstatements state machine 199 200 enum stored_procedure_status {start,is_as,body,bodyend,end, cursor_declare}; 201 enum stored_procedure_type {function,procedure,package_spec,package_body, block_with_begin,block_with_declare, 202 create_trigger,create_library,cursor_in_package_spec,others}; 203 204 static final int stored_procedure_nested_level = 1024; 205 206 // ========== AbstractSqlParser Abstract Methods Implementation ========== 207 208 /** 209 * Return the Oracle lexer instance. 210 * <p> 211 * The lexer is created once in the constructor and reused for all 212 * parsing operations. This method simply returns the existing instance, 213 * matching the original TGSqlParser pattern where the lexer is created 214 * once and reset before each use. 215 * 216 * @param context parser context (not used, lexer already created) 217 * @return the Oracle lexer instance created in constructor 218 */ 219 @Override 220 protected TCustomLexer getLexer(ParserContext context) { 221 // Return existing lexer instance (created in constructor) 222 // No need to create new instance - matches original TGSqlParser pattern 223 return this.flexer; 224 } 225 226 /** 227 * Return the Oracle SQL parser instance with updated token list. 228 * <p> 229 * The parser is created once in the constructor and reused for all 230 * parsing operations. This method updates the token list and returns 231 * the existing instance, matching the original TGSqlParser pattern. 232 * 233 * @param context parser context (not used, parser already created) 234 * @param tokens source token list to parse 235 * @return the Oracle SQL parser instance created in constructor 236 */ 237 @Override 238 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 239 // Update token list for reused parser instance 240 this.fparser.sourcetokenlist = tokens; 241 return this.fparser; 242 } 243 244 /** 245 * Return the Oracle PL/SQL parser instance with updated token list. 246 * <p> 247 * Oracle needs a secondary parser (TParserOceanbaseoracleplsql) for PL/SQL blocks 248 * (procedures, functions, packages, triggers, anonymous blocks). 249 * <p> 250 * The parser is created once in the constructor and reused for all 251 * parsing operations. This method updates the token list and returns 252 * the existing instance, matching the original TGSqlParser pattern. 253 * 254 * @param context parser context (not used, parser already created) 255 * @param tokens source token list to parse 256 * @return the Oracle PL/SQL parser instance created in constructor 257 */ 258 @Override 259 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 260 // Update token list for reused parser instance 261 this.fplsqlparser.sourcetokenlist = tokens; 262 return this.fplsqlparser; 263 } 264 265 /** 266 * Call Oracle-specific tokenization logic. 267 * <p> 268 * Delegates to dooraclesqltexttotokenlist which handles Oracle's 269 * specific keyword recognition, SQL*Plus commands, forward slash 270 * disambiguation, and token generation. 271 */ 272 @Override 273 protected void tokenizeVendorSql() { 274 dooraclesqltexttotokenlist(); 275 } 276 277 /** 278 * Post-tokenization: merge ${...} template variable tokens into single IDENT tokens. 279 * Template syntax like ${if(len(X) == 0, "", "...")} is used by BI tools. 280 */ 281 @Override 282 protected void doAfterTokenize(TSourceTokenList tokens) { 283 super.doAfterTokenize(tokens); 284 mergeTemplateVariableTokens(tokens); 285 } 286 287 private void mergeTemplateVariableTokens(TSourceTokenList tokens) { 288 for (int i = 0; i < tokens.size() - 1; i++) { 289 TSourceToken dollar = tokens.get(i); 290 291 // Match either bare '$' (self-char) or '$IDENT' like $P, $X (identifier starting with $) 292 boolean isDollarChar = (dollar.tokencode == '$'); 293 boolean isDollarIdent = (dollar.tokencode == TBaseType.ident 294 && dollar.astext != null && dollar.astext.startsWith("$")); 295 if (!isDollarChar && !isDollarIdent) continue; 296 297 // Find next non-whitespace token — for $IDENT pattern, require immediate '{' (no whitespace) 298 int braceIdx = i + 1; 299 if (isDollarChar) { 300 while (braceIdx < tokens.size() && tokens.get(braceIdx).tokentype == ETokenType.ttwhitespace) { 301 braceIdx++; 302 } 303 } 304 if (braceIdx >= tokens.size() || tokens.get(braceIdx).tokencode != '{') continue; 305 306 // Found ${ pattern — find matching } with depth tracking 307 int depth = 1; 308 int endIdx = braceIdx + 1; 309 boolean isComplex = false; 310 while (endIdx < tokens.size() && depth > 0) { 311 int code = tokens.get(endIdx).tokencode; 312 if (code == '{') depth++; 313 else if (code == '}') depth--; 314 else if (code == '(' || code == ',' || code == '\'' || code == '"') isComplex = true; 315 if (depth > 0) endIdx++; 316 } 317 if (depth != 0) continue; // unclosed 318 319 // Build merged token text 320 StringBuilder sb = new StringBuilder(); 321 for (int j = i; j <= endIdx; j++) { 322 sb.append(tokens.get(j).astext); 323 } 324 325 if (isComplex && isDollarChar) { 326 // Complex template starting with bare $ like ${if(len(X)==0,...)} 327 // These expand to SQL fragments (e.g., AND clauses) at runtime, 328 // so convert to whitespace to let parser skip them entirely. 329 for (int j = i; j <= endIdx; j++) { 330 tokens.get(j).tokentype = ETokenType.ttwhitespace; 331 tokens.get(j).tokencode = TBaseType.lexspace; 332 } 333 } else { 334 // Simple template like ${NAME}, or JasperReports $P{VAR}/$X{IN,COL,PARAM} 335 // These expand to single values/expressions, so merge into IDENT placeholder. 336 dollar.astext = sb.toString(); 337 dollar.tokencode = TBaseType.ident; 338 dollar.tokentype = ETokenType.ttidentifier; 339 // Convert remaining tokens to whitespace so parser skips them even if 340 // tokenstatus is overwritten by statement splitter (tsignoredbygetrawstatement) 341 for (int j = i + 1; j <= endIdx; j++) { 342 tokens.get(j).tokentype = ETokenType.ttwhitespace; 343 tokens.get(j).tokencode = TBaseType.lexspace; 344 } 345 } 346 347 i = endIdx; // skip past merged tokens 348 } 349 } 350 351 /** 352 * Setup Oracle parsers for raw statement extraction. 353 * <p> 354 * Oracle uses dual parsers (SQL + PL/SQL), so we inject sqlcmds and 355 * update token lists for both parsers. 356 */ 357 @Override 358 protected void setupVendorParsersForExtraction() { 359 // Inject sqlcmds into BOTH parsers (SQL + PL/SQL) 360 this.fparser.sqlcmds = this.sqlcmds; 361 this.fplsqlparser.sqlcmds = this.sqlcmds; 362 363 // Update token list for BOTH parsers 364 this.fparser.sourcetokenlist = this.sourcetokenlist; 365 this.fplsqlparser.sourcetokenlist = this.sourcetokenlist; 366 } 367 368 /** 369 * Call Oracle-specific raw statement extraction logic. 370 * <p> 371 * Delegates to dooraclegetrawsqlstatements which handles Oracle's 372 * statement delimiters (semicolon and forward slash). 373 */ 374 @Override 375 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 376 dooraclegetrawsqlstatements(builder); 377 } 378 379 /** 380 * Perform full parsing of statements with syntax checking. 381 * <p> 382 * This method orchestrates the parsing of all statements by: 383 * <ul> 384 * <li>Using the raw statements passed from AbstractSqlParser.parse()</li> 385 * <li>Initializing SQL and PL/SQL parsers</li> 386 * <li>Creating global context and frame stack</li> 387 * <li>Looping through each raw statement</li> 388 * <li>Calling parsestatement() on each to build AST</li> 389 * <li>Handling error recovery for CREATE TABLE/INDEX</li> 390 * <li>Collecting syntax errors</li> 391 * </ul> 392 * 393 * <p><b>Important:</b> This method does NOT extract raw statements - they are 394 * passed in as a parameter already extracted by {@link #extractRawStatements}. 395 * This eliminates duplicate extraction that was occurring in the old design. 396 * 397 * <p>Extracted from: TGSqlParser.doparse() lines 16903-17026 398 * 399 * @param context parser context 400 * @param parser main SQL parser (TParserOceanbaseoracle) 401 * @param secondaryParser PL/SQL parser (TParserOceanbaseoracleplsql) 402 * @param tokens source token list 403 * @param rawStatements raw statements already extracted (never null) 404 * @return list of fully parsed statements with AST built 405 */ 406 @Override 407 protected TStatementList performParsing(ParserContext context, 408 TCustomParser parser, 409 TCustomParser secondaryParser, 410 TSourceTokenList tokens, 411 TStatementList rawStatements) { 412 // Store references 413 this.fparser = (TParserOceanbaseoracle) parser; 414 this.fplsqlparser = (TParserOceanbaseoracleplsql) secondaryParser; 415 this.sourcetokenlist = tokens; 416 this.parserContext = context; 417 418 // Use the raw statements passed from AbstractSqlParser.parse() 419 // (already extracted - DO NOT re-extract to avoid duplication) 420 this.sqlstatements = rawStatements; 421 422 // Initialize statement parsing infrastructure. We force Oracle 423 // command rules here (NOT vendor=dbvoceanbase, which would route 424 // to TSqlCmdsOceanbase / TSqlCmdsMysql). See the constructor for 425 // the rationale. 426 if (this.sqlcmds == null) { 427 this.sqlcmds = SqlCmdsFactory.get(EDbVendor.dbvoracle); 428 } 429 430 // Inject sqlcmds into parsers (required for make_stmt and other methods) 431 this.fparser.sqlcmds = this.sqlcmds; 432 this.fplsqlparser.sqlcmds = this.sqlcmds; 433 434 // Initialize global context for semantic analysis 435 // CRITICAL: When delegated from TGSqlParser, use TGSqlParser's frameStack 436 // so that variables set in statements can be found by other statements 437 if (context != null && context.getGsqlparser() != null) { 438 TGSqlParser gsqlparser = (TGSqlParser) context.getGsqlparser(); 439 this.frameStack = gsqlparser.getFrameStack(); 440 441 // CRITICAL: Set gsqlparser on the NodeFactory - matches TGSqlParser behavior 442 // This is needed for proper AST node creation during parsing 443 // Without this, expression traversal order may differ, causing 444 // dataflow constant ordering issues 445 this.fparser.getNf().setGsqlParser(gsqlparser); 446 this.fplsqlparser.getNf().setGsqlParser(gsqlparser); 447 448 // Create global context if needed 449 this.globalContext = new TContext(); 450 this.sqlEnv = new TSQLEnv(this.vendor) { 451 @Override 452 public void initSQLEnv() { 453 } 454 }; 455 this.globalContext.setSqlEnv(this.sqlEnv, this.sqlstatements); 456 } else { 457 initializeGlobalContext(); 458 } 459 460 // Parse each statement with exception handling for robustness 461 for (int i = 0; i < sqlstatements.size(); i++) { 462 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 463 464 try { 465 stmt.setFrameStack(frameStack); 466 467 // Parse the statement 468 int parseResult = stmt.parsestatement(null, false, context.isOnlyNeedRawParseTree()); 469 470 // Oracle-specific post-processing (overridden hook method) 471 afterStatementParsed(stmt); 472 473 // Handle error recovery for CREATE TABLE/INDEX 474 boolean doRecover = TBaseType.ENABLE_ERROR_RECOVER_IN_CREATE_TABLE; 475 if (doRecover && ((parseResult != 0) || (stmt.getErrorCount() > 0))) { 476 handleCreateTableErrorRecovery(stmt); 477 } 478 479 // Collect syntax errors 480 if ((parseResult != 0) || (stmt.getErrorCount() > 0)) { 481 copyErrorsFromStatement(stmt); 482 } 483 484 } catch (Exception ex) { 485 // Use inherited exception handler from AbstractSqlParser 486 // This provides consistent error handling across all database parsers 487 handleStatementParsingException(stmt, i, ex); 488 continue; 489 } 490 } 491 492 // Clean up frame stack 493 if (globalFrame != null) { 494 globalFrame.popMeFromStack(frameStack); 495 } 496 497 return this.sqlstatements; 498 } 499 500 // Note: initializeGlobalContext() inherited from AbstractSqlParser 501 502 /** 503 * Override to provide Oracle-specific post-processing after statement parsing. 504 * <p> 505 * For Oracle, we check if the statement is PL/SQL and recursively find syntax 506 * errors in nested PL/SQL statements. 507 */ 508 @Override 509 protected void afterStatementParsed(TCustomSqlStatement stmt) { 510 if (stmt.isoracleplsql()) { 511 findAllSyntaxErrorsInPlsql(stmt); 512 } 513 } 514 515 /** 516 * Perform Oracle-specific semantic analysis using TSQLResolver. 517 * 518 * <p>This includes: 519 * <ul> 520 * <li>Column-to-table resolution</li> 521 * <li>Dataflow analysis</li> 522 * <li>Reference resolution</li> 523 * <li>Scope resolution</li> 524 * </ul> 525 * 526 * @param context the parser context 527 * @param statements the parsed statements 528 */ 529 @Override 530 protected void performSemanticAnalysis(ParserContext context, TStatementList statements) { 531 if (TBaseType.isEnableResolver() && getSyntaxErrors().isEmpty()) { 532 TSQLResolver resolver = new TSQLResolver(globalContext, statements); 533 resolver.resolve(); 534 } 535 } 536 537 /** 538 * Perform Oracle-specific AST interpretation/evaluation using TASTEvaluator. 539 * 540 * <p>This executes simple SQL statements and evaluates expressions 541 * for static analysis and constant folding. 542 * 543 * @param context the parser context 544 * @param statements the parsed statements 545 */ 546 @Override 547 protected void performInterpreter(ParserContext context, TStatementList statements) { 548 if (TBaseType.ENABLE_INTERPRETER && getSyntaxErrors().isEmpty()) { 549 TLog.clearLogs(); 550 TGlobalScope interpreterScope = new TGlobalScope(sqlEnv); 551 TLog.enableInterpreterLogOnly(); 552 TASTEvaluator astEvaluator = new TASTEvaluator(statements, interpreterScope); 553 astEvaluator.eval(); 554 } 555 } 556 557 // ========== Raw Statement Extraction ========== 558 // These methods extract raw SQL statements from tokens without full parsing 559 // Extracted from TGSqlParser.dooraclegetrawsqlstatements() and related methods 560 561 /** 562 * Extract raw Oracle SQL statements from tokenized source. 563 * <p> 564 * This is the main Oracle statement extraction state machine that: 565 * <ul> 566 * <li>Groups tokens into statement boundaries</li> 567 * <li>Identifies statement types (SQL vs PL/SQL, SQL*Plus commands)</li> 568 * <li>Handles nested PL/SQL blocks (procedures, functions, packages, triggers)</li> 569 * <li>Tracks BEGIN/END pairs and other block delimiters</li> 570 * <li>Detects statement terminators (semicolon, forward slash, period)</li> 571 * </ul> 572 * 573 * <p><b>State Machine:</b> Uses 4 main states: 574 * <ul> 575 * <li>{@code stnormal} - Between statements, looking for start of next statement</li> 576 * <li>{@code stsql} - Inside a SQL statement</li> 577 * <li>{@code stsqlplus} - Inside a SQL*Plus command</li> 578 * <li>{@code ststoredprocedure} - Inside a PL/SQL block (procedure/function/package/trigger)</li> 579 * <li>{@code sterror} - Error recovery mode</li> 580 * </ul> 581 * 582 * <p><b>Extracted from:</b> TGSqlParser.dooraclegetrawsqlstatements() (lines 10071-10859) 583 * 584 * <p><b>Design Note:</b> This method now receives a builder to populate with results, 585 * following Option A design where the vendor-specific method focuses on parsing logic 586 * while extractRawStatements() handles result construction. 587 * 588 * @param builder the result builder to populate with statements and error information 589 */ 590 private void dooraclegetrawsqlstatements(SqlParseResult.Builder builder) { 591 int waitingEnds[] = new int[stored_procedure_nested_level]; 592 stored_procedure_type sptype[] = new stored_procedure_type[stored_procedure_nested_level]; 593 stored_procedure_status procedure_status[] = new stored_procedure_status[stored_procedure_nested_level]; 594 boolean endBySlashOnly = true; 595 int nestedProcedures = 0, nestedParenthesis = 0; 596 // Flag for CREATE MLE MODULE with AS clause - terminates with / not ; 597 boolean mleModuleWithAs = false; 598 // Flag for WITH FUNCTION/PROCEDURE - track BEGIN/END nesting to handle embedded semicolons 599 boolean withPlsqlDefinition = false; 600 int withPlsqlBeginEndNesting = 0; 601 boolean withPlsqlFoundSelect = false; // True when SELECT has been found after WITH FUNCTION 602 // Track whether the current CTE statement's main SELECT has been found 603 // (i.e., the SELECT after WITH name AS (...) at paren level 0) 604 boolean cteMainSelectFound = false; 605 606 if (TBaseType.assigned(sqlstatements)) sqlstatements.clear(); 607 if (!TBaseType.assigned(sourcetokenlist)) { 608 // No tokens available - populate builder with error and return 609 builder.errorCode(1); 610 builder.errorMessage("No source token list available"); 611 builder.sqlStatements(new TStatementList()); 612 return; 613 } 614 615 gcurrentsqlstatement = null; 616 EFindSqlStateType gst = EFindSqlStateType.stnormal; 617 TSourceToken lcprevsolidtoken = null, ast = null; 618 619 // Main tokenization loop 620 for (int i = 0; i < sourcetokenlist.size(); i++) { 621 622 if ((ast != null) && (ast.issolidtoken())) 623 lcprevsolidtoken = ast; 624 625 ast = sourcetokenlist.get(i); 626 sourcetokenlist.curpos = i; 627 628 // Token-specific keyword transformations for Oracle 629 performRawStatementTokenTransformations(ast); 630 631 // State machine processing 632 switch (gst) { 633 case sterror: { 634 if (ast.tokentype == ETokenType.ttsemicolon) { 635 appendToken(gcurrentsqlstatement, ast); 636 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 637 gst = EFindSqlStateType.stnormal; 638 } else { 639 appendToken(gcurrentsqlstatement, ast); 640 } 641 break; 642 } //sterror 643 644 case stnormal: { 645 if ((ast.tokencode == TBaseType.cmtdoublehyphen) 646 || (ast.tokencode == TBaseType.cmtslashstar) 647 || (ast.tokencode == TBaseType.lexspace) 648 || (ast.tokencode == TBaseType.lexnewline) 649 || (ast.tokentype == ETokenType.ttsemicolon)) { 650 if (gcurrentsqlstatement != null) { 651 appendToken(gcurrentsqlstatement, ast); 652 } 653 654 if ((lcprevsolidtoken != null) && (ast.tokentype == ETokenType.ttsemicolon)) { 655 if (lcprevsolidtoken.tokentype == ETokenType.ttsemicolon) { 656 // ;;;; continuous semicolon, treat it as comment 657 ast.tokentype = ETokenType.ttsimplecomment; 658 ast.tokencode = TBaseType.cmtdoublehyphen; 659 } 660 } 661 662 continue; 663 } 664 665 if (ast.tokencode == TBaseType.sqlpluscmd) { 666 gst = EFindSqlStateType.stsqlplus; 667 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 668 appendToken(gcurrentsqlstatement, ast); 669 continue; 670 } 671 672 // find a token to start sql or plsql mode 673 gcurrentsqlstatement = sqlcmds.issql(ast, gst, gcurrentsqlstatement); 674 675 if (gcurrentsqlstatement != null) { 676 if (gcurrentsqlstatement.isoracleplsql()) { 677 nestedProcedures = 0; 678 gst = EFindSqlStateType.ststoredprocedure; 679 appendToken(gcurrentsqlstatement, ast); 680 681 switch (gcurrentsqlstatement.sqlstatementtype) { 682 case sstplsql_createprocedure: 683 sptype[nestedProcedures] = stored_procedure_type.procedure; 684 break; 685 case sstplsql_createfunction: 686 sptype[nestedProcedures] = stored_procedure_type.function; 687 break; 688 case sstplsql_createpackage: 689 sptype[nestedProcedures] = stored_procedure_type.package_spec; 690 if (ast.searchToken(TBaseType.rrw_body, 5) != null) { 691 sptype[nestedProcedures] = stored_procedure_type.package_body; 692 } 693 break; 694 case sst_plsql_block: 695 sptype[nestedProcedures] = stored_procedure_type.block_with_declare; 696 if (ast.tokencode == TBaseType.rrw_begin) { 697 sptype[nestedProcedures] = stored_procedure_type.block_with_begin; 698 } 699 break; 700 case sstplsql_createtrigger: 701 sptype[nestedProcedures] = stored_procedure_type.create_trigger; 702 break; 703 case sstoraclecreatelibrary: 704 sptype[nestedProcedures] = stored_procedure_type.create_library; 705 break; 706 case sstplsql_createtype_placeholder: 707 gst = EFindSqlStateType.stsql; 708 break; 709 default: 710 sptype[nestedProcedures] = stored_procedure_type.others; 711 break; 712 } 713 714 if (sptype[0] == stored_procedure_type.block_with_declare) { 715 endBySlashOnly = false; 716 procedure_status[0] = stored_procedure_status.is_as; 717 } else if (sptype[0] == stored_procedure_type.block_with_begin) { 718 endBySlashOnly = false; 719 procedure_status[0] = stored_procedure_status.body; 720 } else if (sptype[0] == stored_procedure_type.procedure) { 721 endBySlashOnly = false; 722 procedure_status[0] = stored_procedure_status.start; 723 } else if (sptype[0] == stored_procedure_type.function) { 724 endBySlashOnly = false; 725 procedure_status[0] = stored_procedure_status.start; 726 } else if (sptype[0] == stored_procedure_type.package_spec) { 727 endBySlashOnly = false; 728 procedure_status[0] = stored_procedure_status.start; 729 } else if (sptype[0] == stored_procedure_type.package_body) { 730 endBySlashOnly = false; 731 procedure_status[0] = stored_procedure_status.start; 732 } else if (sptype[0] == stored_procedure_type.create_trigger) { 733 endBySlashOnly = false; 734 procedure_status[0] = stored_procedure_status.start; 735 } else if (sptype[0] == stored_procedure_type.create_library) { 736 endBySlashOnly = false; 737 procedure_status[0] = stored_procedure_status.bodyend; 738 } else { 739 endBySlashOnly = true; 740 procedure_status[0] = stored_procedure_status.bodyend; 741 } 742 743 if ((ast.tokencode == TBaseType.rrw_begin) 744 || (ast.tokencode == TBaseType.rrw_package) 745 || (ast.searchToken(TBaseType.rrw_package, 4) != null)) { 746 waitingEnds[nestedProcedures] = 1; 747 } 748 } else { 749 gst = EFindSqlStateType.stsql; 750 appendToken(gcurrentsqlstatement, ast); 751 nestedParenthesis = 0; 752 // Check if this is CREATE MLE MODULE with AS clause (JavaScript code) 753 // If AS is found after LANGUAGE JAVASCRIPT, it terminates with / not ; 754 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstoraclecreatemlemodule) { 755 // Look ahead to see if there's an AS keyword 756 TSourceToken asToken = ast.searchToken(TBaseType.rrw_as, 10); 757 mleModuleWithAs = (asToken != null); 758 } else { 759 mleModuleWithAs = false; 760 } 761 762 // Check if this is WITH FUNCTION/PROCEDURE (Oracle 12c inline PL/SQL) 763 // Need to track BEGIN/END nesting to handle embedded semicolons 764 if (ast.tokencode == TBaseType.rrw_with && gcurrentsqlstatement.isctequery) { 765 // Look ahead for FUNCTION or PROCEDURE keyword 766 TSourceToken nextSolid = ast.nextSolidToken(); 767 if (nextSolid != null && (nextSolid.tokencode == TBaseType.rrw_function 768 || nextSolid.tokencode == TBaseType.rrw_procedure)) { 769 withPlsqlDefinition = true; 770 withPlsqlBeginEndNesting = 0; 771 } 772 } 773 } 774 } else { 775 //error token found 776 this.syntaxErrors.add(new TSyntaxError(ast.getAstext(), ast.lineNo, (ast.columnNo < 0 ? 0 : ast.columnNo) 777 , "Error when tokenize", EErrorType.spwarning, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, null, ast.posinlist)); 778 779 ast.tokentype = ETokenType.tttokenlizererrortoken; 780 gst = EFindSqlStateType.sterror; 781 782 gcurrentsqlstatement = new TUnknownSqlStatement(vendor); 783 gcurrentsqlstatement.sqlstatementtype = ESqlStatementType.sstinvalid; 784 appendToken(gcurrentsqlstatement, ast); 785 } 786 787 break; 788 } // stnormal 789 790 case stsqlplus: { 791 if (ast.insqlpluscmd) { 792 appendToken(gcurrentsqlstatement, ast); 793 } else { 794 gst = EFindSqlStateType.stnormal; //this token must be newline, 795 appendToken(gcurrentsqlstatement, ast); // so add it here 796 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 797 } 798 799 break; 800 }//case stsqlplus 801 802 case stsql: { 803 // For WITH FUNCTION/PROCEDURE, track BEGIN/END nesting and when SELECT is found 804 if (withPlsqlDefinition) { 805 if (ast.tokencode == TBaseType.rrw_begin) { 806 withPlsqlBeginEndNesting++; 807 } else if (ast.tokencode == TBaseType.rrw_end) { 808 withPlsqlBeginEndNesting--; 809 if (withPlsqlBeginEndNesting < 0) withPlsqlBeginEndNesting = 0; 810 } else if (ast.tokencode == TBaseType.rrw_select && withPlsqlBeginEndNesting == 0) { 811 // Found SELECT after all function definitions are done 812 withPlsqlFoundSelect = true; 813 } 814 } 815 816 // For CREATE MLE MODULE with AS clause, don't terminate on semicolon 817 // The JavaScript code may contain semicolons; wait for / to terminate 818 // For WITH FUNCTION/PROCEDURE, don't terminate on semicolon until SELECT is found 819 // (the semicolons in function body and after END are part of the function definition) 820 boolean skipSemicolonTermination = mleModuleWithAs || (withPlsqlDefinition && !withPlsqlFoundSelect); 821 if (ast.tokentype == ETokenType.ttsemicolon && !skipSemicolonTermination) { 822 gst = EFindSqlStateType.stnormal; 823 appendToken(gcurrentsqlstatement, ast); 824 gcurrentsqlstatement.semicolonended = ast; 825 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 826 mleModuleWithAs = false; // Reset flag 827 withPlsqlDefinition = false; // Reset WITH FUNCTION flag 828 withPlsqlBeginEndNesting = 0; 829 cteMainSelectFound = false; 830 withPlsqlFoundSelect = false; 831 continue; 832 } 833 834 if (sourcetokenlist.sqlplusaftercurtoken()) //most probably is / cmd 835 { 836 gst = EFindSqlStateType.stnormal; 837 appendToken(gcurrentsqlstatement, ast); 838 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 839 mleModuleWithAs = false; // Reset flag 840 continue; 841 } 842 843 if (ast.tokencode == '(') nestedParenthesis++; 844 if (ast.tokencode == ')') { 845 nestedParenthesis--; 846 if (nestedParenthesis < 0) nestedParenthesis = 0; 847 } 848 849 Boolean findNewStmt = false; 850 TCustomSqlStatement lcStmt = null; 851 // Check for new statement: CREATE TABLE (original), or SELECT inside a non-CTE SELECT 852 boolean shouldCheckNewStmt = false; 853 if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable)) { 854 shouldCheckNewStmt = true; 855 } else if ((nestedParenthesis == 0) && (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 856 && (ast.tokencode == TBaseType.rrw_select || ast.tokencode == TBaseType.rrw_with)) { 857 // Check if current statement is a CTE (starts with WITH) 858 boolean isCteContext = false; 859 for (int si = 0; si < gcurrentsqlstatement.sourcetokenlist.size(); si++) { 860 TSourceToken st = gcurrentsqlstatement.sourcetokenlist.get(si); 861 if (st.tokentype == ETokenType.ttwhitespace || st.tokentype == ETokenType.ttreturn 862 || st.tokencode == TBaseType.cmtdoublehyphen || st.tokencode == TBaseType.cmtslashstar) { 863 continue; 864 } 865 if (st.tokencode == TBaseType.rrw_with) { 866 isCteContext = true; 867 } 868 break; 869 } 870 // Don't split if previous token makes this SELECT part of current statement: 871 // - Set operators: UNION, INTERSECT, MINUS, EXCEPT, ALL 872 // - Left paren: (SELECT ...) — SELECT is main query of parenthesized expr 873 boolean suppressSplit = false; 874 if (ast.tokencode == TBaseType.rrw_select && lcprevsolidtoken != null) { 875 int prevCode = lcprevsolidtoken.tokencode; 876 if (prevCode == TBaseType.rrw_union || prevCode == TBaseType.rrw_intersect 877 || prevCode == TBaseType.rrw_minus || prevCode == TBaseType.rrw_except 878 || prevCode == TBaseType.rrw_all 879 || prevCode == '(') { 880 suppressSplit = true; 881 } 882 } 883 if (suppressSplit) { 884 // SELECT is part of current statement — don't split 885 } else if (!isCteContext) { 886 // Non-CTE SELECT: any SELECT/WITH at paren level 0 starts a new statement 887 shouldCheckNewStmt = true; 888 } else if (cteMainSelectFound) { 889 // CTE context: main SELECT already consumed, so this SELECT/WITH 890 // at paren level 0 is a new statement 891 shouldCheckNewStmt = true; 892 } else if (ast.tokencode == TBaseType.rrw_select) { 893 // CTE context: this is the main SELECT after WITH name AS (...) 894 cteMainSelectFound = true; 895 // Don't split — this SELECT is part of the CTE statement 896 } 897 // If ast is WITH and main SELECT not yet found, it could be another 898 // CTE definition (WITH a AS (...), b AS (...)) — don't split 899 } 900 if (shouldCheckNewStmt) { 901 // For SELECT-after-SELECT/WITH splitting, use stnormal so issql can detect CTE starts. 902 // For CREATE TABLE, preserve original stsql state to avoid false positives 903 // (e.g., INSERT/DELETE keywords in blockchain table clauses). 904 EFindSqlStateType issqlState = (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstselect) 905 ? EFindSqlStateType.stnormal : gst; 906 lcStmt = sqlcmds.issql(ast, issqlState, gcurrentsqlstatement); 907 if (lcStmt != null) { 908 findNewStmt = true; 909 if (lcStmt.sqlstatementtype == ESqlStatementType.sstselect) { 910 TSourceToken prevst = ast.prevSolidToken(); 911 if (gcurrentsqlstatement.sqlstatementtype == ESqlStatementType.sstcreatetable) { 912 // For CREATE TABLE, suppress split when SELECT follows AS/(/): AS (SELECT ...) 913 if ((prevst.tokencode == TBaseType.rrw_as) || (prevst.tokencode == '(') || (prevst.tokencode == ')')) { 914 findNewStmt = false; 915 } 916 } 917 // For SELECT-after-SELECT/WITH splitting at paren level 0, 918 // no suppression needed — the new SELECT/WITH is a new statement 919 } 920 } 921 } 922 923 if (findNewStmt) { 924 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 925 gcurrentsqlstatement = lcStmt; 926 cteMainSelectFound = false; // Reset for new statement 927 nestedParenthesis = 0; // Reset paren tracking for new statement 928 appendToken(gcurrentsqlstatement, ast); 929 continue; 930 } else 931 appendToken(gcurrentsqlstatement, ast); 932 933 break; 934 }//case stsql 935 936 case ststoredprocedure: { 937 938 if (procedure_status[nestedProcedures] != stored_procedure_status.bodyend) { 939 appendToken(gcurrentsqlstatement, ast); 940 } 941 942 switch (procedure_status[nestedProcedures]) { 943 case cursor_declare: 944 if (ast.tokencode == ';') { 945 nestedProcedures--; 946 if (nestedProcedures < 0) { 947 nestedProcedures = 0; 948 } 949 } 950 break; 951 case start: 952 if ((ast.tokencode == TBaseType.rrw_as) || (ast.tokencode == TBaseType.rrw_is)) { 953 if (sptype[nestedProcedures] != stored_procedure_type.create_trigger) { 954 if ((sptype[0] == stored_procedure_type.package_spec) && (nestedProcedures > 0)) { 955 // when it's a package specification, only top level accept as/is 956 } else { 957 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 958 if (ast.searchToken("language", 1) != null) { 959 if (nestedProcedures == 0) { 960 gst = EFindSqlStateType.stsql; 961 } else { 962 procedure_status[nestedProcedures] = stored_procedure_status.body; 963 nestedProcedures--; 964 } 965 } 966 } 967 } 968 } else if (ast.tokencode == TBaseType.rrw_begin) { 969 if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 970 waitingEnds[nestedProcedures]++; 971 } 972 if (nestedProcedures > 0) { 973 nestedProcedures--; 974 } 975 procedure_status[nestedProcedures] = stored_procedure_status.body; 976 } else if (ast.tokencode == TBaseType.rrw_end) { 977 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures - 1] == 1) 978 && ((sptype[nestedProcedures - 1] == stored_procedure_type.package_body) 979 || (sptype[nestedProcedures - 1] == stored_procedure_type.package_spec))) { 980 nestedProcedures--; 981 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 982 } 983 } else if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 984 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 985 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 986 nestedProcedures--; 987 nestedProcedures++; 988 waitingEnds[nestedProcedures] = 0; 989 procedure_status[nestedProcedures] = stored_procedure_status.start; 990 } 991 } else if (ast.tokencode == TBaseType.rrw_oracle_cursor) { 992 if ((nestedProcedures > 0) && (waitingEnds[nestedProcedures] == 0) 993 && (procedure_status[nestedProcedures - 1] == stored_procedure_status.is_as)) { 994 nestedProcedures--; 995 nestedProcedures++; 996 waitingEnds[nestedProcedures] = 0; 997 procedure_status[nestedProcedures] = stored_procedure_status.cursor_declare; 998 } 999 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) && (ast.tokencode == TBaseType.rrw_declare)) { 1000 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 1001 } else if ((sptype[nestedProcedures] == stored_procedure_type.create_trigger) 1002 && (ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 1003 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1004 gst = EFindSqlStateType.stnormal; 1005 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1006 1007 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1008 appendToken(gcurrentsqlstatement, ast); 1009 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1010 } else if (sptype[nestedProcedures] == stored_procedure_type.create_trigger) { 1011 if (ast.tokencode == TBaseType.rrw_trigger) { 1012 TSourceToken compoundSt = ast.searchToken(TBaseType.rrw_oracle_compound, -1); 1013 if (compoundSt != null) { 1014 procedure_status[nestedProcedures] = stored_procedure_status.body; 1015 waitingEnds[nestedProcedures]++; 1016 } 1017 } 1018 } else if ((sptype[nestedProcedures] == stored_procedure_type.function) 1019 && (ast.tokencode == TBaseType.rrw_teradata_using)) { 1020 if ((ast.searchToken("aggregate", -1) != null) || (ast.searchToken("pipelined", -1) != null)) { 1021 if (nestedProcedures == 0) { 1022 gst = EFindSqlStateType.stsql; 1023 } else { 1024 procedure_status[nestedProcedures] = stored_procedure_status.body; 1025 nestedProcedures--; 1026 } 1027 } 1028 } 1029 break; 1030 case is_as: 1031 if ((ast.tokencode == TBaseType.rrw_procedure) || (ast.tokencode == TBaseType.rrw_function)) { 1032 nestedProcedures++; 1033 if (nestedProcedures > stored_procedure_nested_level - 1) { 1034 gst = EFindSqlStateType.sterror; 1035 nestedProcedures--; 1036 } else { 1037 waitingEnds[nestedProcedures] = 0; 1038 procedure_status[nestedProcedures] = stored_procedure_status.start; 1039 } 1040 } else if (ast.tokencode == TBaseType.rrw_begin) { 1041 if ((nestedProcedures == 0) && 1042 ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1043 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1044 // top level package begin already counted 1045 } else { 1046 waitingEnds[nestedProcedures]++; 1047 } 1048 procedure_status[nestedProcedures] = stored_procedure_status.body; 1049 } else if (ast.tokencode == TBaseType.rrw_end) { 1050 if ((nestedProcedures == 0) && (waitingEnds[nestedProcedures] == 1) 1051 && ((sptype[nestedProcedures] == stored_procedure_type.package_body) 1052 || (sptype[nestedProcedures] == stored_procedure_type.package_spec))) { 1053 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1054 waitingEnds[nestedProcedures]--; 1055 } else { 1056 waitingEnds[nestedProcedures]--; 1057 } 1058 } else if (ast.tokencode == TBaseType.rrw_case) { 1059 if (ast.searchToken(';', 1) == null) { 1060 waitingEnds[nestedProcedures]++; 1061 } 1062 } 1063 break; 1064 case body: 1065 if (ast.tokencode == TBaseType.rrw_begin) { 1066 waitingEnds[nestedProcedures]++; 1067 } else if (ast.tokencode == TBaseType.rrw_if) { 1068 if (ast.searchToken(';', 2) == null) { 1069 waitingEnds[nestedProcedures]++; 1070 } 1071 } else if (ast.tokencode == TBaseType.rrw_case) { 1072 if (ast.searchToken(';', 2) == null) { 1073 if (ast.searchToken(TBaseType.rrw_end, -1) == null) { 1074 waitingEnds[nestedProcedures]++; 1075 } 1076 } 1077 } else if (ast.tokencode == TBaseType.rrw_loop) { 1078 if (!((ast.searchToken(TBaseType.rrw_end, -1) != null) 1079 && (ast.searchToken(';', 2) != null))) { 1080 waitingEnds[nestedProcedures]++; 1081 } 1082 } else if (ast.tokencode == TBaseType.rrw_end) { 1083 waitingEnds[nestedProcedures]--; 1084 if (waitingEnds[nestedProcedures] == 0) { 1085 if (nestedProcedures == 0) { 1086 procedure_status[nestedProcedures] = stored_procedure_status.bodyend; 1087 } else { 1088 nestedProcedures--; 1089 procedure_status[nestedProcedures] = stored_procedure_status.is_as; 1090 } 1091 } 1092 } else if ((waitingEnds[nestedProcedures] == 0) 1093 && (ast.tokentype == ETokenType.ttslash) 1094 && (ast.tokencode == TBaseType.sqlpluscmd)) { 1095 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1096 gst = EFindSqlStateType.stnormal; 1097 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1098 1099 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1100 appendToken(gcurrentsqlstatement, ast); 1101 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1102 } 1103 break; 1104 case bodyend: 1105 if ((ast.tokentype == ETokenType.ttslash) && (ast.tokencode == TBaseType.sqlpluscmd)) { 1106 // TPlsqlStatementParse(asqlstatement).TerminatorToken := ast; 1107 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1108 gst = EFindSqlStateType.stnormal; 1109 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1110 1111 //make / a sqlplus cmd 1112 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1113 appendToken(gcurrentsqlstatement, ast); 1114 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1115 } else if ((ast.tokentype == ETokenType.ttperiod) && (sourcetokenlist.returnaftercurtoken(false)) && (sourcetokenlist.returnbeforecurtoken(false))) { 1116 // single dot at a seperate line 1117 ast.tokenstatus = ETokenStatus.tsignorebyyacc; 1118 gst = EFindSqlStateType.stnormal; 1119 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1120 1121 //make ttperiod a sqlplus cmd 1122 gcurrentsqlstatement = new TSqlplusCmdStatement(vendor); 1123 appendToken(gcurrentsqlstatement, ast); 1124 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1125 } else if ((ast.searchToken(TBaseType.rrw_package, 1) != null) && (!endBySlashOnly)) { 1126 appendToken(gcurrentsqlstatement, ast); 1127 gst = EFindSqlStateType.stnormal; 1128 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1129 } else if ((ast.searchToken(TBaseType.rrw_procedure, 1) != null) && (!endBySlashOnly)) { 1130 appendToken(gcurrentsqlstatement, ast); 1131 gst = EFindSqlStateType.stnormal; 1132 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1133 } else if ((ast.searchToken(TBaseType.rrw_function, 1) != null) && (!endBySlashOnly)) { 1134 appendToken(gcurrentsqlstatement, ast); 1135 gst = EFindSqlStateType.stnormal; 1136 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1137 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1138 && ((ast.searchToken(TBaseType.rrw_package, 4) != null) || (ast.searchToken(TBaseType.rrw_package, 5) != null)) 1139 && (!endBySlashOnly)) { 1140 appendToken(gcurrentsqlstatement, ast); 1141 gst = EFindSqlStateType.stnormal; 1142 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1143 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) 1144 && ((ast.searchToken(TBaseType.rrw_procedure, 4) != null) 1145 || (ast.searchToken(TBaseType.rrw_function, 4) != null) 1146 || (ast.searchToken(TBaseType.rrw_view, 4) != null) 1147 || (ast.searchToken(TBaseType.rrw_oracle_synonym, 4) != null) 1148 || (ast.searchToken(TBaseType.rrw_trigger, 4) != null)) 1149 && (!endBySlashOnly)) { 1150 appendToken(gcurrentsqlstatement, ast); 1151 gst = EFindSqlStateType.stnormal; 1152 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1153 } else if ((ast.searchToken(TBaseType.rrw_create, 1) != null) && (ast.searchToken(TBaseType.rrw_library, 4) != null) && (!endBySlashOnly)) { 1154 appendToken(gcurrentsqlstatement, ast); 1155 gst = EFindSqlStateType.stnormal; 1156 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1157 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) && (ast.searchToken(TBaseType.rrw_trigger, 2) != null) && (!endBySlashOnly)) { 1158 appendToken(gcurrentsqlstatement, ast); 1159 gst = EFindSqlStateType.stnormal; 1160 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1161 } else if ((ast.searchToken(TBaseType.rrw_select, 1) != null) && (!endBySlashOnly)) { 1162 appendToken(gcurrentsqlstatement, ast); 1163 gst = EFindSqlStateType.stnormal; 1164 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1165 } else if ((ast.searchToken(TBaseType.rrw_call, 1) != null) && (!endBySlashOnly)) { 1166 appendToken(gcurrentsqlstatement, ast); 1167 gst = EFindSqlStateType.stnormal; 1168 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1169 } else if ((ast.searchToken(TBaseType.rrw_commit, 1) != null) && (!endBySlashOnly)) { 1170 appendToken(gcurrentsqlstatement, ast); 1171 gst = EFindSqlStateType.stnormal; 1172 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1173 } else if ((ast.searchToken(TBaseType.rrw_declare, 1) != null) && (!endBySlashOnly)) { 1174 appendToken(gcurrentsqlstatement, ast); 1175 gst = EFindSqlStateType.stnormal; 1176 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1177 } else if ((ast.searchToken(TBaseType.rrw_grant, 1) != null) 1178 && (ast.searchToken(TBaseType.rrw_execute, 2) != null) && (!endBySlashOnly)) { 1179 appendToken(gcurrentsqlstatement, ast); 1180 gst = EFindSqlStateType.stnormal; 1181 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1182 } else if ((ast.searchToken(TBaseType.rrw_alter, 1) != null) 1183 && (ast.searchToken(TBaseType.rrw_table, 2) != null) && (!endBySlashOnly)) { 1184 appendToken(gcurrentsqlstatement, ast); 1185 gst = EFindSqlStateType.stnormal; 1186 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, false, builder); 1187 } else { 1188 appendToken(gcurrentsqlstatement, ast); 1189 } 1190 break; 1191 case end: 1192 break; 1193 default: 1194 break; 1195 } 1196 1197 if (ast.tokencode == TBaseType.sqlpluscmd) { 1198 int m = flexer.getkeywordvalue(ast.getAstext()); 1199 if (m != 0) { 1200 ast.tokencode = m; 1201 } else if (ast.tokentype == ETokenType.ttslash) { 1202 ast.tokencode = '/'; 1203 } else { 1204 ast.tokencode = TBaseType.ident; 1205 } 1206 } 1207 1208 final int wrapped_keyword_max_pos = 20; 1209 if ((ast.tokencode == TBaseType.rrw_wrapped) 1210 && (ast.posinlist - gcurrentsqlstatement.sourcetokenlist.get(0).posinlist < wrapped_keyword_max_pos)) { 1211 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) { 1212 ((gudusoft.gsqlparser.stmt.TCommonStoredProcedureSqlStatement) gcurrentsqlstatement).setWrapped(true); 1213 } 1214 1215 if (gcurrentsqlstatement instanceof gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) { 1216 if (ast.prevSolidToken() != null) { 1217 ((gudusoft.gsqlparser.stmt.oracle.TPlsqlCreatePackage) gcurrentsqlstatement) 1218 .setPackageName(fparser.getNf().createObjectNameWithPart(ast.prevSolidToken())); 1219 } 1220 } 1221 } 1222 1223 break; 1224 } //ststoredprocedure 1225 1226 } //switch 1227 }//for 1228 1229 //last statement 1230 if ((gcurrentsqlstatement != null) && 1231 ((gst == EFindSqlStateType.stsqlplus) || (gst == EFindSqlStateType.stsql) || (gst == EFindSqlStateType.ststoredprocedure) || 1232 (gst == EFindSqlStateType.sterror))) { 1233 onRawStatementComplete(this.parserContext, gcurrentsqlstatement, this.fparser, this.fplsqlparser, this.sqlstatements, true, builder); 1234 } 1235 1236 // Populate builder with results 1237 builder.sqlStatements(this.sqlstatements); 1238 builder.syntaxErrors(syntaxErrors instanceof ArrayList ? 1239 (ArrayList<TSyntaxError>) syntaxErrors : new ArrayList<>(syntaxErrors)); 1240 builder.errorCode(syntaxErrors.isEmpty() ? 0 : syntaxErrors.size()); 1241 builder.errorMessage(syntaxErrors.isEmpty() ? "" : 1242 String.format("Raw extraction completed with %d error(s)", syntaxErrors.size())); 1243 } 1244 1245 /** 1246 * Handle token transformations during raw statement extraction. 1247 * <p> 1248 * This performs Oracle-specific keyword disambiguation that must happen 1249 * before statement boundary detection. Examples: 1250 * <ul> 1251 * <li>RETURN after WHERE → treat as identifier</li> 1252 * <li>VALUE after BY → mark as value_after_by</li> 1253 * <li>NEW → treat as identifier or constructor based on context</li> 1254 * <li>And many more Oracle-specific cases</li> 1255 * </ul> 1256 * 1257 * @param ast current token being processed 1258 */ 1259 private void performRawStatementTokenTransformations(TSourceToken ast) { 1260 // This method contains the keyword transformation logic from dooraclegetrawsqlstatements 1261 // It's been extracted to keep the main method more readable 1262 1263 if (ast.tokencode == TBaseType.rrw_return) { 1264 TSourceToken stMatch = ast.searchToken(TBaseType.rrw_where, 1); 1265 if (stMatch != null) { 1266 ast.tokencode = TBaseType.ident; 1267 } 1268 } else if (ast.tokencode == TBaseType.rrw_value_oracle) { 1269 TSourceToken stBy = ast.searchToken(TBaseType.rrw_by, -1); 1270 if (stBy != null) { 1271 ast.tokencode = TBaseType.rrw_value_after_by; 1272 } 1273 } else if (ast.tokencode == TBaseType.rrw_new_oracle) { 1274 TSourceToken stRightParen = ast.searchToken(')', -1); 1275 if (stRightParen != null) { 1276 ast.tokencode = TBaseType.ident; 1277 } 1278 TSourceToken stDot = ast.searchToken('.', 1); 1279 if (stDot != null) { 1280 ast.tokencode = TBaseType.ident; 1281 } 1282 1283 TSourceToken stNext = ast.searchTokenAfterObjectName(); 1284 stDot = ast.searchToken('.', 1); 1285 if ((stDot == null) && (stNext != null) && (stNext.tokencode == '(')) { 1286 ast.tokencode = TBaseType.rrw_oracle_new_constructor; 1287 } 1288 } else if (ast.tokencode == TBaseType.rrw_chr_oracle) { 1289 TSourceToken stLeftParen = ast.searchToken('(', 1); 1290 if (stLeftParen == null) { 1291 ast.tokencode = TBaseType.ident; 1292 } 1293 } else if (ast.tokencode == TBaseType.rrw_log_oracle) { 1294 TSourceToken stNext = ast.searchToken(TBaseType.rrw_errors_oracle, 1); 1295 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_view, -1); 1296 if (stPrev == null) { 1297 stPrev = ast.searchToken(TBaseType.rrw_oracle_supplemental, -1); 1298 } 1299 if ((stNext == null) && (stPrev == null)) { 1300 ast.tokencode = TBaseType.ident; 1301 } 1302 } else if (ast.tokencode == TBaseType.rrw_delete) { 1303 TSourceToken stPrev = ast.searchToken('.', -1); 1304 if (stPrev != null) { 1305 ast.tokencode = TBaseType.ident; 1306 } 1307 } else if (ast.tokencode == TBaseType.rrw_partition) { 1308 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_add, -1); 1309 if (stPrev != null) { 1310 stPrev.tokencode = TBaseType.rrw_add_p; 1311 } 1312 } else if (ast.tokencode == TBaseType.rrw_oracle_column) { 1313 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_oracle_modify, -1); 1314 if (stPrev != null) { 1315 ast.tokencode = TBaseType.rrw_oracle_column_after_modify; 1316 } 1317 } else if (ast.tokencode == TBaseType.rrw_oracle_apply) { 1318 TSourceToken stPrev = ast.searchToken(TBaseType.rrw_outer, -1); 1319 if (stPrev != null) { 1320 stPrev.tokencode = TBaseType.ORACLE_OUTER2; 1321 } 1322 } else if (ast.tokencode == TBaseType.rrw_oracle_subpartition) { 1323 TSourceToken stNext = ast.searchToken("(", 2); 1324 if (stNext != null) { 1325 TSourceToken st1 = ast.nextSolidToken(); 1326 if (st1.toString().equalsIgnoreCase("template")) { 1327 // don't change, keep as RW_SUBPARTITION 1328 } else { 1329 ast.tokencode = TBaseType.rrw_oracle_subpartition_tablesample; 1330 } 1331 } 1332 } else if (ast.tokencode == TBaseType.rrw_primary) { 1333 TSourceToken stNext = ast.searchToken("key", 1); 1334 if (stNext == null) { 1335 ast.tokencode = TBaseType.ident; 1336 } 1337 } else if (ast.tokencode == TBaseType.rrw_oracle_offset) { 1338 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_row, 2); 1339 if (stNext == null) { 1340 stNext = ast.searchToken(TBaseType.rrw_oracle_rows, 2); 1341 } 1342 if (stNext != null) { 1343 ast.tokencode = TBaseType.rrw_oracle_offset_row; 1344 } 1345 } else if (ast.tokencode == TBaseType.rrw_translate) { 1346 TSourceToken stNext = ast.searchToken("(", 2); 1347 if (stNext == null) { 1348 ast.tokencode = TBaseType.ident; 1349 } 1350 } else if (ast.tokencode == TBaseType.rrw_constraint) { 1351 TSourceToken stNext = ast.nextSolidToken(); 1352 if (stNext == null) { 1353 ast.tokencode = TBaseType.ident; 1354 } else { 1355 if (stNext.tokencode != TBaseType.ident) { 1356 ast.tokencode = TBaseType.ident; 1357 } 1358 } 1359 } else if (ast.tokencode == TBaseType.rrw_oracle_without) { 1360 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_count, 1); 1361 if (stNext != null) { 1362 ast.tokencode = TBaseType.rrw_oracle_without_before_count; 1363 } 1364 } else if (ast.tokencode == TBaseType.rrw_bulk) { 1365 TSourceToken stNext = ast.searchToken(TBaseType.rrw_oracle_collect, 1); 1366 if (stNext == null) { 1367 ast.tokencode = TBaseType.ident; 1368 } 1369 } else if (ast.tokencode == TBaseType.rrw_oracle_model) { 1370 TSourceToken stNext = ast.nextSolidToken(); 1371 if (stNext != null) { 1372 switch (stNext.toString().toUpperCase()) { 1373 case "RETURN": 1374 case "REFERENCE": 1375 case "IGNORE": 1376 case "KEEP": 1377 case "UNIQUE": 1378 case "PARTITION": 1379 case "DIMENSION": 1380 case "MEASURES": 1381 case "RULES": 1382 ast.tokencode = TBaseType.rrw_oracle_model_in_model_clause; 1383 break; 1384 default: 1385 ; 1386 } 1387 } 1388 } 1389 } 1390 1391 private void appendToken(TCustomSqlStatement statement, TSourceToken token) { 1392 if (statement == null || token == null) { 1393 return; 1394 } 1395 token.stmt = statement; 1396 statement.sourcetokenlist.add(token); 1397 } 1398 1399 // ========== Error Handling and Recovery ========== 1400 1401 /** 1402 * Find all syntax errors in PL/SQL statements recursively. 1403 * Extracted from TGSqlParser.findAllSyntaxErrorsInPlsql(). 1404 */ 1405 private void findAllSyntaxErrorsInPlsql(TCustomSqlStatement psql) { 1406 if (psql.getErrorCount() > 0) { 1407 copyErrorsFromStatement(psql); 1408 } 1409 1410 for (int k = 0; k < psql.getStatements().size(); k++) { 1411 findAllSyntaxErrorsInPlsql(psql.getStatements().get(k)); 1412 } 1413 } 1414 1415 /** 1416 * Handle error recovery for CREATE TABLE/INDEX statements. 1417 * Oracle allows table properties that may not be fully parsed. 1418 * This method marks unparseable properties as SQL*Plus commands to skip them. 1419 * 1420 * <p>Extracted from TGSqlParser.doparse() lines 16916-16971 1421 */ 1422 private void handleCreateTableErrorRecovery(TCustomSqlStatement stmt) { 1423 if (((stmt.sqlstatementtype == ESqlStatementType.sstcreatetable) || 1424 (stmt.sqlstatementtype == ESqlStatementType.sstcreateindex)) && 1425 (!TBaseType.c_createTableStrictParsing)) { 1426 1427 // Find the closing parenthesis of table definition 1428 int nested = 0; 1429 boolean isIgnore = false, isFoundIgnoreToken = false; 1430 TSourceToken firstIgnoreToken = null; 1431 1432 for (int k = 0; k < stmt.sourcetokenlist.size(); k++) { 1433 TSourceToken st = stmt.sourcetokenlist.get(k); 1434 1435 if (isIgnore) { 1436 if (st.issolidtoken() && (st.tokencode != ';')) { 1437 isFoundIgnoreToken = true; 1438 if (firstIgnoreToken == null) { 1439 firstIgnoreToken = st; 1440 } 1441 } 1442 if (st.tokencode != ';') { 1443 st.tokencode = TBaseType.sqlpluscmd; 1444 } 1445 continue; 1446 } 1447 1448 if (st.tokencode == (int) ')') { 1449 nested--; 1450 if (nested == 0) { 1451 // Check if next token is "AS ( SELECT" 1452 boolean isSelect = false; 1453 TSourceToken st1 = st.searchToken(TBaseType.rrw_as, 1); 1454 if (st1 != null) { 1455 TSourceToken st2 = st.searchToken((int) '(', 2); 1456 if (st2 != null) { 1457 TSourceToken st3 = st.searchToken(TBaseType.rrw_select, 3); 1458 isSelect = (st3 != null); 1459 } 1460 } 1461 if (!isSelect) isIgnore = true; 1462 } 1463 } 1464 1465 if ((st.tokencode == (int) '(') || (st.tokencode == TBaseType.left_parenthesis_2)) { 1466 nested++; 1467 } 1468 } 1469 1470 // Verify it's a valid Oracle table property 1471 if ((firstIgnoreToken != null) && 1472 (!TBaseType.searchOracleTablePros(firstIgnoreToken.toString()))) { 1473 // Not a valid property, keep the error 1474 isFoundIgnoreToken = false; 1475 } 1476 1477 // Retry parsing if we found ignoreable properties 1478 if (isFoundIgnoreToken) { 1479 stmt.clearError(); 1480 stmt.parsestatement(null, false); 1481 } 1482 } 1483 } 1484 1485 /** 1486 * Copy syntax errors from a statement to our error list. 1487 * Extracted from TGSqlParser.copyerrormsg(). 1488 */ 1489 1490 @Override 1491 public String toString() { 1492 return "OceanBaseOracleSqlParser{vendor=" + vendor + "}"; 1493 } 1494 1495 // ========== Main Oracle Tokenization ========== 1496 // Core tokenization logic extracted from TGSqlParser.dooraclesqltexttotokenlist() 1497 1498 /** 1499 * Perform Oracle-specific tokenization with SQL*Plus command detection. 1500 * <p> 1501 * This method implements Oracle's complex tokenization rules including: 1502 * <ul> 1503 * <li>SQL*Plus command detection (SPOOL, SET, START, etc.)</li> 1504 * <li>Forward slash disambiguation (division vs PL/SQL delimiter)</li> 1505 * <li>Oracle-specific keyword transformations (INNER, TYPE, FULL, etc.)</li> 1506 * <li>Context-dependent token code modifications</li> 1507 * </ul> 1508 * 1509 * <p><b>State Machine:</b> Uses 5 boolean flags to track tokenization state: 1510 * <ul> 1511 * <li>{@code insqlpluscmd} - Currently inside SQL*Plus command</li> 1512 * <li>{@code isvalidplace} - Valid place to start SQL*Plus command</li> 1513 * <li>{@code waitingreturnforfloatdiv} - Slash seen, waiting for newline</li> 1514 * <li>{@code waitingreturnforsemicolon} - Semicolon seen, waiting for newline</li> 1515 * <li>{@code continuesqlplusatnewline} - SQL*Plus command continues to next line</li> 1516 * </ul> 1517 * 1518 * <p><b>Extracted from:</b> TGSqlParser.dooraclesqltexttotokenlist() (lines 3931-4298) 1519 * 1520 * @throws RuntimeException if tokenization fails 1521 */ 1522 private void dooraclesqltexttotokenlist() { 1523 // Initialize state machine for SQL*Plus command detection 1524 insqlpluscmd = false; 1525 isvalidplace = true; 1526 waitingreturnforfloatdiv = false; 1527 waitingreturnforsemicolon = false; 1528 continuesqlplusatnewline = false; 1529 1530 ESqlPlusCmd currentCmdType = ESqlPlusCmd.spcUnknown; 1531 1532 TSourceToken lct = null, prevst = null; 1533 1534 TSourceToken asourcetoken, lcprevst; 1535 int yychar; 1536 1537 asourcetoken = getanewsourcetoken(); 1538 if (asourcetoken == null) return; 1539 yychar = asourcetoken.tokencode; 1540 1541 while (yychar > 0) { 1542 sourcetokenlist.add(asourcetoken); 1543 1544 switch (yychar) { 1545 case TBaseType.cmtdoublehyphen: 1546 case TBaseType.cmtslashstar: 1547 case TBaseType.lexspace: { 1548 if (insqlpluscmd) { 1549 asourcetoken.insqlpluscmd = true; 1550 } 1551 break; 1552 } 1553 1554 case TBaseType.lexnewline: { 1555 if (insqlpluscmd) { 1556 insqlpluscmd = false; 1557 isvalidplace = true; 1558 1559 if (continuesqlplusatnewline) { 1560 insqlpluscmd = true; 1561 isvalidplace = false; 1562 asourcetoken.insqlpluscmd = true; 1563 } 1564 1565 if (!insqlpluscmd) { 1566 currentCmdType = ESqlPlusCmd.spcUnknown; 1567 } 1568 } 1569 1570 if (waitingreturnforsemicolon) { 1571 isvalidplace = true; 1572 } 1573 1574 if (waitingreturnforfloatdiv) { 1575 isvalidplace = true; 1576 lct.tokencode = TBaseType.sqlpluscmd; 1577 if (lct.tokentype != ETokenType.ttslash) { 1578 lct.tokentype = ETokenType.ttsqlpluscmd; 1579 } 1580 } 1581 1582 if (countLines(asourcetoken.toString()) > 1) { 1583 // There is a line after select, so spool is the right place to start a sqlplus command 1584 isvalidplace = true; 1585 } 1586 1587 flexer.insqlpluscmd = insqlpluscmd; 1588 break; 1589 } 1590 1591 default: { 1592 // Solid token 1593 // Save semicolon flag before clearing: slash after semicolon on 1594 // the same line (e.g. "END; /") should be a SQL*Plus delimiter, 1595 // not division. 1596 boolean prevWasSemicolon = waitingreturnforsemicolon; 1597 continuesqlplusatnewline = false; 1598 waitingreturnforsemicolon = false; 1599 waitingreturnforfloatdiv = false; 1600 1601 if (insqlpluscmd) { 1602 asourcetoken.insqlpluscmd = true; 1603 if (asourcetoken.toString().equalsIgnoreCase("-")) { 1604 continuesqlplusatnewline = true; 1605 } 1606 } else { 1607 if (asourcetoken.tokentype == ETokenType.ttsemicolon) { 1608 waitingreturnforsemicolon = true; 1609 } 1610 1611 if ((asourcetoken.tokentype == ETokenType.ttslash) 1612 && (isvalidplace || prevWasSemicolon || (isValidPlaceForDivToSqlplusCmd(sourcetokenlist, asourcetoken.posinlist)))) { 1613 lct = asourcetoken; 1614 waitingreturnforfloatdiv = true; 1615 } 1616 1617 currentCmdType = TSqlplusCmdStatement.searchCmd(asourcetoken.toString(), asourcetoken.nextToken()); 1618 if (currentCmdType != ESqlPlusCmd.spcUnknown) { 1619 if (isvalidplace) { 1620 TSourceToken lnbreak = null; 1621 boolean aRealSqlplusCmd = true; 1622 if (sourcetokenlist.curpos > 0) { 1623 lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1624 aRealSqlplusCmd = !spaceAtTheEndOfReturnToken(lnbreak.toString()); 1625 } 1626 1627 if (aRealSqlplusCmd) { 1628 asourcetoken.prevTokenCode = asourcetoken.tokencode; 1629 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1630 if (asourcetoken.tokentype != ETokenType.ttslash) { 1631 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1632 } 1633 insqlpluscmd = true; 1634 flexer.insqlpluscmd = insqlpluscmd; 1635 } 1636 } else if ((asourcetoken.tokencode == TBaseType.rrw_connect) && (sourcetokenlist.returnbeforecurtoken(true))) { 1637 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1638 if (asourcetoken.tokentype != ETokenType.ttslash) { 1639 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1640 } 1641 insqlpluscmd = true; 1642 flexer.insqlpluscmd = insqlpluscmd; 1643 } else if (sourcetokenlist.returnbeforecurtoken(true)) { 1644 TSourceToken lnbreak = sourcetokenlist.get(sourcetokenlist.curpos - 1); 1645 1646 if ((countLines(lnbreak.toString()) > 1) && (!spaceAtTheEndOfReturnToken(lnbreak.toString()))) { 1647 asourcetoken.tokencode = TBaseType.sqlpluscmd; 1648 if (asourcetoken.tokentype != ETokenType.ttslash) { 1649 asourcetoken.tokentype = ETokenType.ttsqlpluscmd; 1650 } 1651 insqlpluscmd = true; 1652 flexer.insqlpluscmd = insqlpluscmd; 1653 } 1654 } 1655 } 1656 } 1657 1658 isvalidplace = false; 1659 1660 // Oracle-specific keyword handling (inline to match legacy behavior) 1661 if (prevst != null) { 1662 if (prevst.tokencode == TBaseType.rrw_inner) { 1663 if (asourcetoken.tokencode != flexer.getkeywordvalue("JOIN")) { 1664 prevst.tokencode = TBaseType.ident; 1665 } 1666 } else if ((prevst.tokencode == TBaseType.rrw_not) 1667 && (asourcetoken.tokencode == flexer.getkeywordvalue("DEFERRABLE"))) { 1668 prevst.tokencode = flexer.getkeywordvalue("NOT_DEFERRABLE"); 1669 } 1670 } 1671 1672 if (asourcetoken.tokencode == TBaseType.rrw_inner) { 1673 prevst = asourcetoken; 1674 } else if (asourcetoken.tokencode == TBaseType.rrw_not) { 1675 prevst = asourcetoken; 1676 } else { 1677 prevst = null; 1678 } 1679 1680 // Oracle keyword transformations that rely on prev token state 1681 if ((asourcetoken.tokencode == flexer.getkeywordvalue("DIRECT_LOAD")) 1682 || (asourcetoken.tokencode == flexer.getkeywordvalue("ALL"))) { 1683 lcprevst = getprevsolidtoken(asourcetoken); 1684 if (lcprevst != null) { 1685 if (lcprevst.tokencode == TBaseType.rrw_for) 1686 lcprevst.tokencode = TBaseType.rw_for1; 1687 } 1688 } else if (asourcetoken.tokencode == TBaseType.rrw_dense_rank) { 1689 TSourceToken stKeep = asourcetoken.searchToken(TBaseType.rrw_keep, -2); 1690 if (stKeep != null) { 1691 stKeep.tokencode = TBaseType.rrw_keep_before_dense_rank; 1692 } 1693 } else if (asourcetoken.tokencode == TBaseType.rrw_full) { 1694 TSourceToken stMatch = asourcetoken.searchToken(TBaseType.rrw_match, -1); 1695 if (stMatch != null) { 1696 asourcetoken.tokencode = TBaseType.RW_FULL2; 1697 } 1698 } else if (asourcetoken.tokencode == TBaseType.rrw_join) { 1699 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1700 if (stFull != null) { 1701 stFull.tokencode = TBaseType.RW_FULL2; 1702 } else { 1703 TSourceToken stNatural = asourcetoken.searchToken(TBaseType.rrw_natural, -4); 1704 if (stNatural != null) { 1705 stNatural.tokencode = TBaseType.RW_NATURAL2; 1706 } 1707 } 1708 } else if (asourcetoken.tokencode == TBaseType.rrw_outer) { 1709 TSourceToken stFull = asourcetoken.searchToken(TBaseType.rrw_full, -1); 1710 if (stFull != null) { 1711 stFull.tokencode = TBaseType.RW_FULL2; 1712 } 1713 } else if (asourcetoken.tokencode == TBaseType.rrw_is) { 1714 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1715 if (stType != null) { 1716 stType.tokencode = TBaseType.rrw_type2; 1717 } 1718 } else if (asourcetoken.tokencode == TBaseType.rrw_as) { 1719 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1720 if (stType != null) { 1721 stType.tokencode = TBaseType.rrw_type2; 1722 } 1723 } else if (asourcetoken.tokencode == TBaseType.rrw_oid) { 1724 TSourceToken stType = asourcetoken.searchToken(TBaseType.rrw_type, -2); 1725 if (stType != null) { 1726 stType.tokencode = TBaseType.rrw_type2; 1727 } 1728 } else if (asourcetoken.tokencode == TBaseType.rrw_type) { 1729 TSourceToken stPrev; 1730 stPrev = asourcetoken.searchToken(TBaseType.rrw_drop, -1); 1731 if (stPrev != null) { 1732 asourcetoken.tokencode = TBaseType.rrw_type2; 1733 } 1734 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1735 stPrev = asourcetoken.searchToken(TBaseType.rrw_of, -1); 1736 if (stPrev != null) { 1737 asourcetoken.tokencode = TBaseType.rrw_type2; 1738 } 1739 } 1740 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1741 stPrev = asourcetoken.searchToken(TBaseType.rrw_create, -1); 1742 if (stPrev != null) { 1743 asourcetoken.tokencode = TBaseType.rrw_type2; 1744 } 1745 } 1746 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1747 stPrev = asourcetoken.searchToken(TBaseType.rrw_replace, -1); 1748 if (stPrev != null) { 1749 asourcetoken.tokencode = TBaseType.rrw_type2; 1750 } 1751 } 1752 if (asourcetoken.tokencode == TBaseType.rrw_type) { 1753 stPrev = asourcetoken.searchToken('%', -1); 1754 if (stPrev != null) { 1755 asourcetoken.tokencode = TBaseType.rrw_type2; 1756 } 1757 } 1758 } else if ((asourcetoken.tokencode == TBaseType.rrw_by) || (asourcetoken.tokencode == TBaseType.rrw_to)) { 1759 lcprevst = getprevsolidtoken(asourcetoken); 1760 if (lcprevst != null) { 1761 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("connect"))) { 1762 lcprevst.tokencode = TBaseType.rrw_connect; 1763 lcprevst.tokentype = ETokenType.ttkeyword; 1764 flexer.insqlpluscmd = false; 1765 1766 continuesqlplusatnewline = false; 1767 waitingreturnforsemicolon = false; 1768 waitingreturnforfloatdiv = false; 1769 isvalidplace = false; 1770 insqlpluscmd = false; 1771 } 1772 } 1773 } else if (asourcetoken.tokencode == TBaseType.rrw_with) { 1774 lcprevst = getprevsolidtoken(asourcetoken); 1775 if (lcprevst != null) { 1776 if ((lcprevst.tokencode == TBaseType.sqlpluscmd) && (lcprevst.toString().equalsIgnoreCase("start"))) { 1777 lcprevst.tokencode = TBaseType.rrw_start; 1778 lcprevst.tokentype = ETokenType.ttkeyword; 1779 flexer.insqlpluscmd = false; 1780 1781 continuesqlplusatnewline = false; 1782 waitingreturnforsemicolon = false; 1783 waitingreturnforfloatdiv = false; 1784 isvalidplace = false; 1785 insqlpluscmd = false; 1786 } 1787 } 1788 } else if (asourcetoken.tokencode == TBaseType.rrw_set) { 1789 lcprevst = getprevsolidtoken(asourcetoken); 1790 if (lcprevst != null) { 1791 if (lcprevst.getAstext().equalsIgnoreCase("a")) { 1792 TSourceToken lcpp = getprevsolidtoken(lcprevst); 1793 if (lcpp != null) { 1794 if ((lcpp.tokencode == TBaseType.rrw_not) || (lcpp.tokencode == TBaseType.rrw_is)) { 1795 lcprevst.tokencode = TBaseType.rrw_oracle_a_in_aset; 1796 asourcetoken.tokencode = TBaseType.rrw_oracle_set_in_aset; 1797 } 1798 } 1799 } 1800 } 1801 } 1802 1803 break; 1804 } 1805 } 1806 1807 // Get next token 1808 asourcetoken = getanewsourcetoken(); 1809 if (asourcetoken != null) { 1810 yychar = asourcetoken.tokencode; 1811 1812 // Handle special case: dot after SQL*Plus commands 1813 if ((asourcetoken.tokencode == '.') && (getprevsolidtoken(asourcetoken) != null) 1814 && ((currentCmdType == ESqlPlusCmd.spcAppend) 1815 || (currentCmdType == ESqlPlusCmd.spcChange) || (currentCmdType == ESqlPlusCmd.spcInput) 1816 || (currentCmdType == ESqlPlusCmd.spcList) || (currentCmdType == ESqlPlusCmd.spcRun))) { 1817 // a.ent_rp_usr_id is not a real sqlplus command 1818 TSourceToken lcprevst2 = getprevsolidtoken(asourcetoken); 1819 lcprevst2.insqlpluscmd = false; 1820 if (lcprevst2.prevTokenCode != 0) { 1821 lcprevst2.tokencode = lcprevst2.prevTokenCode; 1822 } else { 1823 lcprevst2.tokencode = TBaseType.ident; 1824 } 1825 1826 flexer.insqlpluscmd = false; 1827 continuesqlplusatnewline = false; 1828 waitingreturnforsemicolon = false; 1829 waitingreturnforfloatdiv = false; 1830 isvalidplace = false; 1831 insqlpluscmd = false; 1832 } 1833 } else { 1834 yychar = 0; 1835 1836 if (waitingreturnforfloatdiv) { 1837 // / at the end of line treat as sqlplus command 1838 lct.tokencode = TBaseType.sqlpluscmd; 1839 if (lct.tokentype != ETokenType.ttslash) { 1840 lct.tokentype = ETokenType.ttsqlpluscmd; 1841 } 1842 } 1843 } 1844 1845 if ((yychar == 0) && (prevst != null)) { 1846 if (prevst.tokencode == TBaseType.rrw_inner) { 1847 prevst.tokencode = TBaseType.ident; 1848 } 1849 } 1850 } 1851 } 1852 1853 // ========== Helper Methods for Tokenization ========== 1854 // These methods support Oracle-specific tokenization logic 1855 1856 /** 1857 * Count number of newlines in a string. 1858 * 1859 * @param s string to analyze 1860 * @return number of line breaks (LF or CR) 1861 */ 1862 private int countLines(String s) { 1863 int pos = 0, lf = 0, cr = 0; 1864 1865 while (pos < s.length()) { 1866 if (s.charAt(pos) == '\r') { 1867 cr++; 1868 pos++; 1869 continue; 1870 } 1871 if (s.charAt(pos) == '\n') { 1872 lf++; 1873 pos++; 1874 continue; 1875 } 1876 1877 if (s.charAt(pos) == ' ') { 1878 pos++; 1879 continue; 1880 } 1881 break; 1882 } 1883 1884 if (lf >= cr) return lf; 1885 else return cr; 1886 } 1887 1888 /** 1889 * Check if return token ends with space or tab. 1890 * 1891 * @param s token text 1892 * @return true if ends with space/tab 1893 */ 1894 private boolean spaceAtTheEndOfReturnToken(String s) { 1895 if (s == null) return false; 1896 if (s.length() == 0) return false; 1897 1898 return ((s.charAt(s.length() - 1) == ' ') || (s.charAt(s.length() - 1) == '\t')); 1899 } 1900 1901 /** 1902 * Determine if forward slash should be treated as SQL*Plus command delimiter. 1903 * <p> 1904 * Oracle uses '/' as both division operator and SQL*Plus block delimiter. 1905 * This method disambiguates by checking if the '/' appears at the beginning 1906 * of a line (after a return token without trailing whitespace). 1907 * 1908 * @param pstlist token list 1909 * @param pPos position of '/' token 1910 * @return true if '/' should be SQL*Plus command 1911 */ 1912 private boolean isValidPlaceForDivToSqlplusCmd(TSourceTokenList pstlist, int pPos) { 1913 boolean ret = false; 1914 1915 if ((pPos <= 0) || (pPos > pstlist.size() - 1)) return ret; 1916 1917 // Token directly before div must be ttreturn without space appending it 1918 gudusoft.gsqlparser.TSourceToken lcst = pstlist.get(pPos - 1); 1919 if (lcst.tokentype != gudusoft.gsqlparser.ETokenType.ttreturn) { 1920 return ret; 1921 } 1922 1923 if (!(lcst.getAstext().charAt(lcst.getAstext().length() - 1) == ' ')) { 1924 ret = true; 1925 } 1926 1927 return ret; 1928 } 1929 1930 /** 1931 * Get previous non-whitespace token. 1932 * 1933 * @param ptoken current token 1934 * @return previous solid token, or null 1935 */ 1936 private gudusoft.gsqlparser.TSourceToken getprevsolidtoken(gudusoft.gsqlparser.TSourceToken ptoken) { 1937 gudusoft.gsqlparser.TSourceToken ret = null; 1938 TSourceTokenList lctokenlist = ptoken.container; 1939 1940 if (lctokenlist != null) { 1941 if ((ptoken.posinlist > 0) && (lctokenlist.size() > ptoken.posinlist - 1)) { 1942 if (!( 1943 (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttwhitespace) 1944 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttreturn) 1945 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttsimplecomment) 1946 || (lctokenlist.get(ptoken.posinlist - 1).tokentype == gudusoft.gsqlparser.ETokenType.ttbracketedcomment) 1947 )) { 1948 ret = lctokenlist.get(ptoken.posinlist - 1); 1949 } else { 1950 ret = lctokenlist.nextsolidtoken(ptoken.posinlist - 1, -1, false); 1951 } 1952 } 1953 } 1954 return ret; 1955 } 1956}