001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.TBaseType; 004import gudusoft.gsqlparser.TCustomSqlStatement; 005import gudusoft.gsqlparser.IRelation; 006import gudusoft.gsqlparser.TLog; 007import gudusoft.gsqlparser.TSourceToken; 008import gudusoft.gsqlparser.TStatementList; 009import gudusoft.gsqlparser.ETableSource; 010import gudusoft.gsqlparser.EDbVendor; 011import gudusoft.gsqlparser.EDbObjectType; 012import gudusoft.gsqlparser.EErrorType; 013import gudusoft.gsqlparser.ESqlClause; 014import gudusoft.gsqlparser.ESqlStatementType; 015import gudusoft.gsqlparser.TSyntaxError; 016import gudusoft.gsqlparser.stmt.dax.TDaxStmt; 017import gudusoft.gsqlparser.stmt.TAlterTableStatement; 018import gudusoft.gsqlparser.stmt.TCreateTableSqlStatement; 019import gudusoft.gsqlparser.stmt.TInsertSqlStatement; 020import gudusoft.gsqlparser.stmt.TUpdateSqlStatement; 021import gudusoft.gsqlparser.stmt.TDeleteSqlStatement; 022import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 023import gudusoft.gsqlparser.compiler.TContext; 024import gudusoft.gsqlparser.nodes.TObjectName; 025import gudusoft.gsqlparser.nodes.TObjectNameList; 026import gudusoft.gsqlparser.nodes.TTable; 027import gudusoft.gsqlparser.nodes.TJoinExpr; 028import gudusoft.gsqlparser.nodes.TParseTreeNode; 029import gudusoft.gsqlparser.nodes.TParseTreeVisitor; 030import gudusoft.gsqlparser.nodes.TResultColumn; 031import gudusoft.gsqlparser.nodes.TResultColumnList; 032import gudusoft.gsqlparser.nodes.TQualifyClause; 033import gudusoft.gsqlparser.nodes.TExpression; 034import gudusoft.gsqlparser.EExpressionType; 035import gudusoft.gsqlparser.resolver2.model.ColumnSource; 036import gudusoft.gsqlparser.resolver2.model.FromScopeIndex; 037import gudusoft.gsqlparser.resolver2.model.ResolutionContext; 038import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 039import gudusoft.gsqlparser.resolver2.model.ResolutionStatistics; 040import gudusoft.gsqlparser.resolver2.ResolutionStatus; 041import gudusoft.gsqlparser.resolver2.result.IResolutionResult; 042import gudusoft.gsqlparser.resolver2.result.ResolutionResultImpl; 043import gudusoft.gsqlparser.resolver2.scope.FromScope; 044import gudusoft.gsqlparser.resolver2.scope.GlobalScope; 045import gudusoft.gsqlparser.resolver2.scope.IScope; 046import gudusoft.gsqlparser.resolver2.scope.SelectScope; 047import gudusoft.gsqlparser.resolver2.scope.CTEScope; 048import gudusoft.gsqlparser.resolver2.scope.GroupByScope; 049import gudusoft.gsqlparser.resolver2.scope.HavingScope; 050import gudusoft.gsqlparser.resolver2.scope.OrderByScope; 051import gudusoft.gsqlparser.resolver2.scope.UpdateScope; 052import gudusoft.gsqlparser.resolver2.scope.DeleteScope; 053import gudusoft.gsqlparser.resolver2.namespace.INamespace; 054import gudusoft.gsqlparser.resolver2.namespace.TableNamespace; 055import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 056import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 057import gudusoft.gsqlparser.nodes.TCTE; 058import gudusoft.gsqlparser.nodes.TCTEList; 059import gudusoft.gsqlparser.nodes.TUnnestClause; 060import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 061import gudusoft.gsqlparser.resolver2.iterative.ConvergenceDetector; 062import gudusoft.gsqlparser.resolver2.iterative.ResolutionPass; 063import gudusoft.gsqlparser.resolver2.enhancement.NamespaceEnhancer; 064import gudusoft.gsqlparser.resolver2.enhancement.EnhancementResult; 065import gudusoft.gsqlparser.resolver2.enhancement.CollectedColumnRef; 066import gudusoft.gsqlparser.resolver2.metadata.BatchMetadataCollector; 067import gudusoft.gsqlparser.resolver2.context.DatabaseContextTracker; 068import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 069import gudusoft.gsqlparser.sqlenv.TSQLEnv; 070import gudusoft.gsqlparser.TAttributeNode; 071 072import java.util.ArrayDeque; 073import java.util.ArrayList; 074import java.util.Deque; 075import java.util.HashMap; 076import java.util.HashSet; 077import java.util.IdentityHashMap; 078import java.util.List; 079import java.util.Map; 080import java.util.Set; 081 082// ScopeBuilder for visitor-based scope construction 083import gudusoft.gsqlparser.resolver2.ScopeBuilder; 084import gudusoft.gsqlparser.resolver2.ScopeBuildResult; 085 086/** 087 * New SQL Resolver - Phase 2 Enhanced Framework 088 * 089 * This is the main entry point for the new resolution architecture. 090 * Provides improved column-to-table resolution with: 091 * - Clear scope-based name resolution 092 * - Full candidate collection for ambiguous cases 093 * - Confidence-scored inference 094 * - Better tracing and debugging 095 * 096 * Usage: 097 * <pre> 098 * TSQLResolver2 resolver = new TSQLResolver2(context, statements); 099 * boolean success = resolver.resolve(); 100 * ResolutionStatistics stats = resolver.getStatistics(); 101 * </pre> 102 * 103 * Phase 1 capabilities: 104 * - Basic SELECT statement resolution 105 * - Table and subquery namespaces 106 * - Qualified and unqualified column references 107 * - FROM clause scope management 108 * 109 * Phase 2 capabilities: 110 * - JOIN scope handling with nullable semantics 111 * - CTE (WITH clause) resolution 112 * - Iterative resolution framework (auto-converges after first pass if no iteration needed) 113 * 114 * Future phases will add: 115 * - Evidence-based inference 116 * - Star column expansion 117 */ 118public class TSQLResolver2 { 119 120 private final TContext globalContext; 121 private final TStatementList sqlStatements; 122 private final TSQLResolverConfig config; 123 private final ResolutionContext resolutionContext; 124 private final NameResolver nameResolver; 125 126 /** Global scope (root of scope tree) */ 127 private GlobalScope globalScope; 128 129 /** Convergence detector for iterative resolution */ 130 private ConvergenceDetector convergenceDetector; 131 132 /** History of all resolution passes */ 133 private final List<ResolutionPass> passHistory; 134 135 /** 136 * Scope cache for iterative resolution. 137 * Maps statements to their scope trees to avoid rebuilding scopes on each pass. 138 * Key: TCustomSqlStatement, Value: SelectScope (or other scope type) 139 */ 140 private final java.util.Map<Object, IScope> statementScopeCache; 141 142 /** 143 * Column-to-Scope mapping for iterative resolution (Principle 1: Scope完全复用). 144 * Built once in Pass 1, reused in Pass 2+ to avoid rebuilding scopes. 145 * Maps each TObjectName (column reference) to the IScope where it should be resolved. 146 */ 147 private final java.util.Map<TObjectName, IScope> columnToScopeMap; 148 149 /** 150 * FromScope index cache for O(1) table/namespace lookups (Performance Optimization B). 151 * Maps FromScope instances to their pre-built indexes. 152 * Built lazily on first access, cleared at the start of each resolve() call. 153 * Uses IdentityHashMap because we need object identity, not equals(). 154 */ 155 private final Map<IScope, FromScopeIndex> fromScopeIndexCache; 156 157 /** 158 * Cache for Teradata NAMED alias lookup. 159 * Maps SELECT statements to their alias index (alias name -> TResultColumn). 160 * Uses IdentityHashMap because we need object identity, not equals(). 161 * Optimization C: Reduces O(cols * select_items) to O(cols) for Teradata. 162 */ 163 private final Map<TSelectSqlStatement, Map<String, TResultColumn>> teradataNamedAliasCache; 164 165 /** 166 * All column references collected during Pass 1 (Principle 1: Scope完全复用). 167 * Used in Pass 2+ to re-resolve names without rebuilding the scope tree. 168 */ 169 private final List<TObjectName> allColumnReferences; 170 171 /** 172 * ScopeBuilder for visitor-based scope construction. 173 * Replaces manual scope building with proper nested scope handling. 174 */ 175 private final ScopeBuilder scopeBuilder; 176 177 /** 178 * Result from ScopeBuilder containing the complete scope tree. 179 * This is populated in Pass 1 and reused in Pass 2+. 180 */ 181 private ScopeBuildResult scopeBuildResult; 182 183 /** 184 * NamespaceEnhancer for explicit column collection and enhancement. 185 * Handles the explicit namespace enhancement phase between resolution passes. 186 * Columns are collected during resolution and added to namespaces explicitly. 187 */ 188 private NamespaceEnhancer namespaceEnhancer; 189 190 /** 191 * Create resolver with default configuration 192 */ 193 public TSQLResolver2(TContext context, TStatementList statements) { 194 this(context, statements, TSQLResolverConfig.createDefault()); 195 } 196 197 /** 198 * Create resolver with custom configuration 199 */ 200 public TSQLResolver2(TContext context, TStatementList statements, TSQLResolverConfig config) { 201 this.globalContext = context; 202 this.sqlStatements = statements; 203 this.config = config; 204 this.resolutionContext = new ResolutionContext(); 205 this.nameResolver = new NameResolver(config, resolutionContext); 206 this.passHistory = new ArrayList<>(); 207 this.statementScopeCache = new java.util.HashMap<>(); 208 this.columnToScopeMap = new java.util.HashMap<>(); 209 this.fromScopeIndexCache = new IdentityHashMap<>(); 210 this.teradataNamedAliasCache = new IdentityHashMap<>(); 211 this.allColumnReferences = new ArrayList<>(); 212 213 // Initialize ScopeBuilder for visitor-based scope construction 214 this.scopeBuilder = new ScopeBuilder(context, config.getNameMatcher()); 215 // Pass guessColumnStrategy from config for namespace isolation (prevents test side effects) 216 if (config.hasCustomGuessColumnStrategy()) { 217 this.scopeBuilder.setGuessColumnStrategy(config.getGuessColumnStrategy()); 218 } 219 220 // If context is null, try to get TSQLEnv from statements 221 // This allows TSQLEnv to flow from parser.setSqlEnv() through statements 222 if (statements != null && statements.size() > 0) { 223 try { 224 TCustomSqlStatement firstStmt = statements.get(0); 225 if (firstStmt != null && firstStmt.getGlobalScope() != null && 226 firstStmt.getGlobalScope().getSqlEnv() != null) { 227 this.scopeBuilder.setSqlEnv(firstStmt.getGlobalScope().getSqlEnv()); 228 } 229 } catch (Exception e) { 230 // Silently ignore - SQLEnv is optional enhancement 231 } 232 } 233 234 // Initialize convergence detector for iterative resolution 235 this.convergenceDetector = new ConvergenceDetector( 236 config.getMaxIterations(), 237 config.getStablePassesForConvergence(), 238 config.getMinProgressRate() 239 ); 240 241 // Initialize namespace enhancer for explicit column collection 242 // Debug mode follows the global resolver log setting 243 this.namespaceEnhancer = new NamespaceEnhancer(TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE); 244 } 245 246 /** 247 * Set the TSQLEnv to use for table metadata lookup. 248 * This allows external callers to provide TSQLEnv if automatic detection fails. 249 * 250 * @param sqlEnv the SQL environment containing table metadata 251 */ 252 public void setSqlEnv(gudusoft.gsqlparser.sqlenv.TSQLEnv sqlEnv) { 253 if (scopeBuilder != null) { 254 scopeBuilder.setSqlEnv(sqlEnv); 255 } 256 } 257 258 /** 259 * Get the TSQLEnv used for table metadata lookup. 260 * 261 * @return the SQL environment, or null if not set 262 */ 263 public gudusoft.gsqlparser.sqlenv.TSQLEnv getSqlEnv() { 264 return scopeBuilder != null ? scopeBuilder.getSqlEnv() : null; 265 } 266 267 /** 268 * Get the set of virtual trigger tables (deleted/inserted in SQL Server triggers). 269 * These tables should be excluded from table output since their columns are 270 * resolved to the trigger's target table. 271 * 272 * @return Set of TTable objects that are virtual trigger tables 273 */ 274 public java.util.Set<gudusoft.gsqlparser.nodes.TTable> getVirtualTriggerTables() { 275 return scopeBuilder != null ? scopeBuilder.getVirtualTriggerTables() : java.util.Collections.emptySet(); 276 } 277 278 /** 279 * Get the SQL statements being resolved. 280 * 281 * @return the list of SQL statements 282 */ 283 public TStatementList getStatements() { 284 return sqlStatements; 285 } 286 287 // Performance timing fields (instance-level for single resolve() call) 288 private long timeScopeBuilder = 0; 289 private long timeNameResolution = 0; 290 private long timeEnhancement = 0; 291 private long timeLegacySync = 0; 292 private long timeOther = 0; 293 294 // Global accumulators for profiling across all resolve() calls 295 private static long globalTimeScopeBuilder = 0; 296 private static long globalTimeNameResolution = 0; 297 private static long globalTimeEnhancement = 0; 298 private static long globalTimeLegacySync = 0; 299 private static long globalTimeOther = 0; 300 private static int globalResolveCount = 0; 301 302 /** 303 * Reset global timing accumulators. 304 */ 305 public static void resetGlobalTimings() { 306 globalTimeScopeBuilder = 0; 307 globalTimeNameResolution = 0; 308 globalTimeEnhancement = 0; 309 globalTimeLegacySync = 0; 310 globalTimeOther = 0; 311 globalResolveCount = 0; 312 // Reset detailed legacy sync timings 313 globalTimeClearLinked = 0; 314 globalTimeFillAttributes = 0; 315 globalTimeSyncColumns = 0; 316 globalTimePopulateOrphans = 0; 317 globalTimeClearHints = 0; 318 } 319 320 /** 321 * Get global performance timing breakdown for profiling across all resolve() calls. 322 * @return formatted timing information 323 */ 324 public static String getGlobalPerformanceTimings() { 325 long total = globalTimeScopeBuilder + globalTimeNameResolution + globalTimeEnhancement + globalTimeLegacySync + globalTimeOther; 326 return String.format( 327 "TSQLResolver2 Global Timings (across %d resolve() calls):\n" + 328 " ScopeBuilder: %d ms (%.1f%%)\n" + 329 " NameResolution: %d ms (%.1f%%)\n" + 330 " Enhancement: %d ms (%.1f%%)\n" + 331 " LegacySync: %d ms (%.1f%%)\n" + 332 " Other: %d ms (%.1f%%)\n" + 333 " Total: %d ms", 334 globalResolveCount, 335 globalTimeScopeBuilder, total > 0 ? 100.0 * globalTimeScopeBuilder / total : 0, 336 globalTimeNameResolution, total > 0 ? 100.0 * globalTimeNameResolution / total : 0, 337 globalTimeEnhancement, total > 0 ? 100.0 * globalTimeEnhancement / total : 0, 338 globalTimeLegacySync, total > 0 ? 100.0 * globalTimeLegacySync / total : 0, 339 globalTimeOther, total > 0 ? 100.0 * globalTimeOther / total : 0, 340 total); 341 } 342 343 /** 344 * Get performance timing breakdown for profiling. 345 * @return formatted timing information 346 */ 347 public String getPerformanceTimings() { 348 long total = timeScopeBuilder + timeNameResolution + timeEnhancement + timeLegacySync + timeOther; 349 return String.format( 350 "TSQLResolver2 Timings:\n" + 351 " ScopeBuilder: %d ms (%.1f%%)\n" + 352 " NameResolution: %d ms (%.1f%%)\n" + 353 " Enhancement: %d ms (%.1f%%)\n" + 354 " LegacySync: %d ms (%.1f%%)\n" + 355 " Other: %d ms (%.1f%%)\n" + 356 " Total: %d ms", 357 timeScopeBuilder, total > 0 ? 100.0 * timeScopeBuilder / total : 0, 358 timeNameResolution, total > 0 ? 100.0 * timeNameResolution / total : 0, 359 timeEnhancement, total > 0 ? 100.0 * timeEnhancement / total : 0, 360 timeLegacySync, total > 0 ? 100.0 * timeLegacySync / total : 0, 361 timeOther, total > 0 ? 100.0 * timeOther / total : 0, 362 total); 363 } 364 365 /** 366 * Perform resolution on all SQL statements 367 */ 368 public boolean resolve() { 369 // Reset timing counters 370 timeScopeBuilder = 0; 371 timeNameResolution = 0; 372 timeEnhancement = 0; 373 timeLegacySync = 0; 374 timeOther = 0; 375 376 // Setup logging 377 TLog.clearLogs(); 378 if (!TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 379 TLog.disableLog(); 380 } else { 381 TLog.enableAllLevelLog(); 382 } 383 384 try { 385 logInfo("Starting TSQLResolver2.resolve()"); 386 387 long startTime = System.currentTimeMillis(); 388 389 // Delta 1: Collect metadata from DDL statements if no SQLEnv provided 390 if (getSqlEnv() == null) { 391 collectBatchMetadata(); 392 } 393 394 // Delta 4: Track database context from USE/SET statements 395 trackDatabaseContext(); 396 397 // Phase 1: Build global scope (once for all passes) 398 buildGlobalScope(); 399 400 timeOther += System.currentTimeMillis() - startTime; 401 402 // Phase 2: Perform iterative resolution 403 // (automatically completes after first pass if no second pass is needed) 404 return performIterativeResolution(); 405 406 } catch (Exception e) { 407 logError("Exception in TSQLResolver2.resolve(): " + e.getMessage()); 408 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 409 e.printStackTrace(); 410 } 411 return false; 412 } finally { 413 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 414 TBaseType.dumpLogs(false); 415 } 416 } 417 } 418 419 /** 420 * Perform iterative resolution. 421 * Automatically converges after first pass if no additional passes are needed. 422 * 423 * Architecture: 424 * - Pass 1: Build scope tree + initial name resolution 425 * - Pass 2-N: Reuse scope tree, collect evidence, infer columns, re-resolve names 426 * 427 * This separation allows: 428 * 1. Scopes to accumulate inferred columns across iterations 429 * 2. Later scopes to reference earlier scopes' inferred columns 430 * 3. Forward references to be resolved in subsequent passes 431 */ 432 private boolean performIterativeResolution() { 433 logInfo("Performing iterative resolution (max iterations: " + config.getMaxIterations() + ")"); 434 435 int passNumber = 1; 436 ResolutionStatistics previousStats = null; 437 boolean continueIterating = true; 438 boolean scopesBuilt = false; 439 440 while (continueIterating) { 441 logInfo("=== Pass " + passNumber + " ==="); 442 443 // Create a resolution pass 444 ResolutionPass pass = new ResolutionPass(passNumber, previousStats); 445 446 if (passNumber == 1) { 447 // ========== PASS 1: Build scope tree + initial resolution ========== 448 logInfo("Pass 1: Building scope tree using ScopeBuilder and performing initial resolution"); 449 450 // Clear all state for fresh start 451 resolutionContext.clear(); 452 columnToScopeMap.clear(); 453 fromScopeIndexCache.clear(); 454 dmlIndexCache.clear(); 455 teradataNamedAliasCache.clear(); 456 allColumnReferences.clear(); 457 458 // Use ScopeBuilder to build complete scope tree (handles all nesting correctly) 459 long scopeBuilderStart = System.currentTimeMillis(); 460 scopeBuildResult = scopeBuilder.build(sqlStatements); 461 462 // Get global scope from builder 463 globalScope = scopeBuildResult.getGlobalScope(); 464 465 // Copy column references and scope mappings from ScopeBuildResult 466 columnToScopeMap.putAll(scopeBuildResult.getColumnToScopeMap()); 467 allColumnReferences.addAll(scopeBuildResult.getAllColumnReferences()); 468 timeScopeBuilder += System.currentTimeMillis() - scopeBuilderStart; 469 470 logInfo("ScopeBuilder complete: " + scopeBuildResult.getStatistics()); 471 logInfo("Built " + scopeBuildResult.getStatementScopeMap().size() + " SelectScopes"); 472 473 // Initialize NamespaceEnhancer with scope tree (caches star namespaces) 474 namespaceEnhancer.initialize(scopeBuildResult); 475 namespaceEnhancer.startPass(passNumber); 476 477 // Get SET clause target columns that should not be re-resolved 478 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 479 480 // Get INSERT ALL target columns that should not be re-resolved 481 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 482 483 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 484 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 485 486 // Perform initial name resolution for all collected columns 487 logInfo("Performing initial name resolution for " + allColumnReferences.size() + " column references"); 488 long nameResStart = System.currentTimeMillis(); 489 for (TObjectName objName : allColumnReferences) { 490 // Skip SET clause target columns - they already have sourceTable correctly set 491 // to the UPDATE target table and should NOT be resolved through star columns 492 if (setClauseTargetColumns.contains(objName)) { 493 continue; 494 } 495 496 // Skip INSERT ALL target columns - they already have sourceTable correctly set 497 // to the INSERT target table and should NOT be resolved against the subquery scope 498 if (insertAllTargetColumns.contains(objName)) { 499 continue; 500 } 501 502 IScope scope = columnToScopeMap.get(objName); 503 if (scope != null) { 504 nameResolver.resolve(objName, scope); 505 506 // Handle USING column priority for JOIN...USING syntax 507 handleUsingColumnResolution(objName); 508 509 // Handle Teradata NAMED alias resolution 510 handleTeradataNamedAliasResolution(objName); 511 handleQualifyClauseAliasResolution(objName); 512 513 // Handle subquery aliased/calculated column resolution 514 // Ensures aliased columns don't incorrectly trace to base tables 515 handleSubqueryAliasedColumnResolution(objName); 516 517 // Collect unresolved references for enhancement 518 collectForEnhancementIfNeeded(objName, scope); 519 } 520 } 521 522 // Restore sourceTable for MERGE INSERT VALUES columns after name resolution. 523 // Name resolution may have set an AMBIGUOUS resolution (e.g., column 'product' 524 // appears in both target and source tables through the ON clause). In MERGE 525 // semantics, WHEN NOT MATCHED VALUES columns always reference the USING (source) 526 // table. 527 // 528 // For AMBIGUOUS resolution: clear it so getSourceTable() returns the actual field 529 // value (the USING table). AMBIGUOUS means the column was found in both target and 530 // source namespaces, but semantically it must reference the source. 531 // 532 // For EXACT_MATCH resolution: keep it because it contains star column push-down 533 // tracing info (e.g., when USING is a subquery with SELECT *, the resolution 534 // traces the VALUES column to the physical table inside the subquery). 535 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 536 TObjectName col = entry.getKey(); 537 TTable usingTable = entry.getValue(); 538 ResolutionResult res = col.getResolution(); 539 if (res != null && res.isAmbiguous()) { 540 col.setResolution(null); 541 } 542 col.setSourceTable(usingTable); 543 } 544 545 timeNameResolution += System.currentTimeMillis() - nameResStart; 546 547 // Explicit Enhancement Phase: Add collected columns to namespaces 548 long enhanceStart = System.currentTimeMillis(); 549 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 550 timeEnhancement += System.currentTimeMillis() - enhanceStart; 551 logInfo("Pass 1 enhancement: " + enhanceResult.getTotalAdded() + " columns added to namespaces"); 552 553 scopesBuilt = true; 554 logInfo("Pass 1 complete. Resolved " + allColumnReferences.size() + " column references."); 555 556 557 } else { 558 // ========== PASS 2+: Explicit Enhancement + Re-resolve ========== 559 logInfo("Pass " + passNumber + ": Explicit namespace enhancement and re-resolution"); 560 561 // ======== Phase A: Start New Pass ======== 562 namespaceEnhancer.startPass(passNumber); 563 564 // ======== Phase B: Clear Resolution Results (keep scopes!) ======== 565 logInfo("Phase B: Clearing resolution results (scopes preserved)"); 566 resolutionContext.clear(); 567 568 // ======== Phase C: Re-resolve with Enhanced Namespaces ======== 569 logInfo("Phase C: Re-resolving with enhanced namespaces"); 570 571 // Get SET clause target columns that should not be re-resolved 572 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 573 574 // Get INSERT ALL target columns that should not be re-resolved 575 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 576 577 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 578 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 579 580 // Re-resolve all column references using their original scopes 581 // Scopes are reused from Pass 1, but namespaces may have been enhanced 582 for (TObjectName objName : allColumnReferences) { 583 // Skip SET clause target columns - they already have sourceTable correctly set 584 // to the UPDATE target table and should NOT be resolved through star columns 585 if (setClauseTargetColumns.contains(objName)) { 586 continue; 587 } 588 589 // Skip INSERT ALL target columns - they already have sourceTable correctly set 590 // to the INSERT target table and should NOT be resolved against the subquery scope 591 if (insertAllTargetColumns.contains(objName)) { 592 continue; 593 } 594 595 IScope scope = columnToScopeMap.get(objName); 596 if (scope != null) { 597 nameResolver.resolve(objName, scope); 598 599 // Handle USING column priority for JOIN...USING syntax 600 handleUsingColumnResolution(objName); 601 602 // Handle Teradata NAMED alias resolution 603 handleTeradataNamedAliasResolution(objName); 604 handleQualifyClauseAliasResolution(objName); 605 606 // Handle subquery aliased/calculated column resolution 607 // Ensures aliased columns don't incorrectly trace to base tables 608 handleSubqueryAliasedColumnResolution(objName); 609 610 // Collect for next enhancement pass if still targets star namespace 611 collectForEnhancementIfNeeded(objName, scope); 612 } 613 } 614 615 // Restore sourceTable for MERGE INSERT VALUES columns after re-resolution 616 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 617 ResolutionResult res = entry.getKey().getResolution(); 618 if (res != null && res.isAmbiguous()) { 619 entry.getKey().setResolution(null); 620 } 621 entry.getKey().setSourceTable(entry.getValue()); 622 } 623 624 // ======== Phase D: Explicit Namespace Enhancement ======== 625 logInfo("Phase D: Explicit namespace enhancement"); 626 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 627 logInfo("Pass " + passNumber + " enhancement: " + 628 enhanceResult.getTotalAdded() + " columns added, " + 629 enhanceResult.getTotalSkipped() + " skipped (existing)"); 630 631 // Legacy support: also run old evidence collection (if needed) 632 if (config.isEvidenceCollectionEnabled()) { 633 runLegacyEvidenceCollection(); 634 } 635 } 636 637 // Get statistics after this pass 638 ResolutionStatistics currentStats = getStatistics(); 639 pass.complete(currentStats); 640 641 // Record this pass 642 convergenceDetector.recordPass(pass); 643 passHistory.add(pass); 644 645 logInfo(pass.getSummary()); 646 647 // Check convergence 648 ConvergenceDetector.ConvergenceResult convergence = convergenceDetector.checkConvergence(); 649 if (convergence.hasConverged()) { 650 logInfo("Convergence detected: " + convergence.getReason()); 651 pass.setStopReason(convergence.getReason()); 652 continueIterating = false; 653 } else { 654 // Prepare for next pass 655 previousStats = currentStats; 656 passNumber++; 657 } 658 } 659 660 // Create cloned columns for star column tracing 661 // This is a CORE part of TSQLResolver2 - when a column traces through a CTE/subquery 662 // with SELECT * to a physical table, we create a cloned TObjectName with sourceTable 663 // pointing to the traced physical table. This ensures complete lineage tracking. 664 createTracedColumnClones(); 665 666 // Sync to legacy structures if enabled 667 if (config.isLegacyCompatibilityEnabled()) { 668 long syncStart = System.currentTimeMillis(); 669 syncToLegacyStructures(); 670 timeLegacySync += System.currentTimeMillis() - syncStart; 671 } 672 673 // Print final statistics 674 logInfo("Iterative resolution complete after " + passHistory.size() + " passes"); 675 ResolutionStatistics finalStats = getStatistics(); 676 logInfo("Final statistics: " + finalStats); 677 678 // Print namespace enhancement summary if in debug mode 679 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 680 logInfo("=== Namespace Enhancement Summary ==="); 681 logInfo("Total columns added: " + namespaceEnhancer.getTotalColumnsAdded()); 682 } 683 684 // Print performance timing breakdown 685 logInfo(getPerformanceTimings()); 686 687 // Accumulate to global timings for profiling 688 globalTimeScopeBuilder += timeScopeBuilder; 689 globalTimeNameResolution += timeNameResolution; 690 globalTimeEnhancement += timeEnhancement; 691 globalTimeLegacySync += timeLegacySync; 692 globalTimeOther += timeOther; 693 globalResolveCount++; 694 695 return true; 696 } 697 698 /** 699 * Run legacy evidence collection (deprecated). 700 * Kept for backward compatibility. 701 */ 702 @SuppressWarnings("deprecation") 703 private void runLegacyEvidenceCollection() { 704 logInfo("Running legacy evidence collection (deprecated)"); 705 706 gudusoft.gsqlparser.resolver2.inference.EvidenceCollector evidenceCollector = 707 new gudusoft.gsqlparser.resolver2.inference.EvidenceCollector(); 708 709 int evidenceCount = 0; 710 for (int i = 0; i < sqlStatements.size(); i++) { 711 Object stmt = sqlStatements.get(i); 712 if (stmt instanceof TSelectSqlStatement) { 713 List<gudusoft.gsqlparser.resolver2.inference.InferenceEvidence> stmtEvidence = 714 evidenceCollector.collectFromSelect((TSelectSqlStatement) stmt); 715 evidenceCount += stmtEvidence.size(); 716 } 717 } 718 719 logInfo("Legacy evidence collection: " + evidenceCount + " items"); 720 } 721 722 /** 723 * Get the namespace enhancer for external access to enhancement history. 724 * 725 * @return the namespace enhancer 726 */ 727 public NamespaceEnhancer getNamespaceEnhancer() { 728 return namespaceEnhancer; 729 } 730 731 /** 732 * Get a detailed enhancement report. 733 * 734 * @return detailed report string 735 */ 736 public String getEnhancementReport() { 737 return namespaceEnhancer.generateReport(); 738 } 739 740 /** 741 * Re-process a statement for name resolution only (without rebuilding scopes). 742 * This is used in Pass 2+ to re-resolve names using enhanced scopes. 743 * 744 * CRITICAL (Principle 1: Scope完全复用): 745 * - Scope tree is built ONCE in Pass 1 and completely reused in Pass 2+ 746 * - This method MUST NOT call processStatement() which rebuilds scopes 747 * - Instead, it iterates through allColumnReferences and re-resolves each 748 * column using its original scope from columnToScopeMap 749 * 750 * This allows: 751 * - Namespaces to be enhanced across iterations (Principle 2) 752 * - Star columns to benefit from reverse inference (Principle 3) 753 * - All previous inference results to be preserved 754 */ 755 private void reprocessStatementNamesOnly(Object statement) { 756 logDebug("Re-resolving column references without rebuilding scopes"); 757 758 // Re-resolve all column references using their original scopes 759 // The scopes are reused from Pass 1, but their namespaces may have been enhanced 760 for (TObjectName objName : allColumnReferences) { 761 IScope scope = columnToScopeMap.get(objName); 762 if (scope != null) { 763 // Re-resolve this column using the (potentially enhanced) scope 764 nameResolver.resolve(objName, scope); 765 766 // Handle USING column priority for JOIN...USING syntax 767 handleUsingColumnResolution(objName); 768 769 // Handle Teradata NAMED alias resolution 770 handleTeradataNamedAliasResolution(objName); 771 handleQualifyClauseAliasResolution(objName); 772 773 // Collect for next enhancement pass if still unresolved 774 collectForEnhancementIfNeeded(objName, scope); 775 } else { 776 logError("No scope found for column: " + objName); 777 } 778 } 779 } 780 781 /** 782 * Handle special resolution for USING columns in JOIN...USING syntax. 783 * In "a JOIN table2 USING (id)", the USING column exists in BOTH tables. 784 * - The synthetic column (clone) resolves to the right-side table (table2) 785 * - The original USING column resolves to the left-side table (a) 786 * 787 * @param objName The column reference 788 */ 789 private void handleUsingColumnResolution(TObjectName objName) { 790 if (objName == null || scopeBuildResult == null) return; 791 792 // Check if this is a synthetic USING column (should resolve to right table) 793 TTable rightTable = scopeBuildResult.getUsingColumnRightTable(objName); 794 if (rightTable != null) { 795 // This is the synthetic USING column - set its sourceTable to the right-side table 796 objName.setSourceTable(rightTable); 797 798 // Create a proper resolution with the right-side table 799 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 800 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 801 null, // no namespace for USING columns 802 objName.getColumnNameOnly(), 803 null, // no definition node 804 1.0, // high confidence 805 "using_column_right", 806 rightTable // override table - the right-side table of the JOIN 807 ); 808 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 809 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 810 811 // Update the TObjectName's resolution so formatter uses correct finalTable 812 objName.setResolution(result); 813 814 // Also register in ResolutionContext so getReferencesTo(table) can find it 815 resolutionContext.registerResolution(objName, result); 816 817 logDebug("USING column " + objName.getColumnNameOnly() + 818 " -> right-side table " + rightTable.getName()); 819 return; 820 } 821 822 // Check if this is the original USING column (should resolve to left table) 823 TTable leftTable = scopeBuildResult.getUsingColumnLeftTable(objName); 824 if (leftTable != null) { 825 // This is the original USING column - set its sourceTable to the left-side table 826 objName.setSourceTable(leftTable); 827 828 // Create a proper resolution with the left-side table 829 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 830 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 831 null, // no namespace for USING columns 832 objName.getColumnNameOnly(), 833 null, // no definition node 834 1.0, // high confidence 835 "using_column_left", 836 leftTable // override table - the left-side table of the JOIN 837 ); 838 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 839 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 840 841 // Update the TObjectName's resolution so formatter uses correct finalTable 842 objName.setResolution(result); 843 844 // Also register in ResolutionContext so getReferencesTo(table) can find it 845 resolutionContext.registerResolution(objName, result); 846 847 logDebug("USING column " + objName.getColumnNameOnly() + 848 " -> left-side table " + leftTable.getName()); 849 } 850 } 851 852 /** 853 * Handle Teradata NAMED alias resolution. 854 * 855 * <p>In Teradata, NAMED aliases defined in the SELECT list (using the {@code (NAMED alias)} syntax) 856 * can be referenced in the WHERE and QUALIFY clauses of the same SELECT statement. This is different 857 * from standard SQL where column aliases are only visible in ORDER BY.</p> 858 * 859 * <p>This method checks if a resolved column matches a NAMED alias from the enclosing SELECT list. 860 * If it does, the resolution is updated to indicate this is a calculated column (alias), not a 861 * physical column from the table.</p> 862 * 863 * <p>Example:</p> 864 * <pre> 865 * SELECT USI_ID, SUBS_ID, 866 * (CAST(:param AS TIMESTAMP(0)))(NAMED REPORT_DTTM) 867 * FROM PRD2_ODW.SUBS_USI_HISTORY 868 * WHERE stime <= REPORT_DTTM AND etime > REPORT_DTTM 869 * </pre> 870 * <p>Here, REPORT_DTTM references in WHERE should NOT be linked to PRD2_ODW.SUBS_USI_HISTORY 871 * because REPORT_DTTM is a NAMED alias, not a physical column.</p> 872 * 873 * @param objName The column reference to check 874 */ 875 private void handleTeradataNamedAliasResolution(TObjectName objName) { 876 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 877 878 // Only applies to Teradata 879 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 880 if (dbVendor != EDbVendor.dbvteradata) return; 881 882 String columnName = objName.getColumnNameOnly(); 883 if (columnName == null || columnName.isEmpty()) return; 884 885 // Only apply to UNQUALIFIED column references (no table prefix) 886 // If a column has a table qualifier like "CP.CALC_PLATFORM_ID", it's clearly 887 // referencing a specific table's column, not a NAMED alias 888 if (objName.getTableToken() != null) return; 889 890 // Get the scope for this column reference 891 IScope scope = columnToScopeMap.get(objName); 892 if (scope == null) return; 893 894 // Find the enclosing SELECT statement from the scope 895 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 896 if (enclosingSelect == null) return; 897 898 // Optimization C: Use cached index for O(1) lookup instead of O(N) iteration 899 Map<String, TResultColumn> aliasIndex = getTeradataNamedAliasIndex(enclosingSelect); 900 if (aliasIndex == null || aliasIndex.isEmpty()) return; 901 902 // Look up the result column by alias name (case-insensitive, stored as lowercase) 903 TResultColumn resultCol = aliasIndex.get(columnName.toLowerCase()); 904 if (resultCol == null) return; 905 906 // Skip if objName is part of this result column's expression 907 // This handles cases like "CAST(ID AS DECIMAL) AS ID" where the ID inside 908 // CAST is the source column, not a reference to the ID alias 909 if (isColumnWithinResultColumn(objName, resultCol)) { 910 return; 911 } 912 913 // Found a matching NAMED alias 914 // Clear the source table since this is an alias, not a physical column 915 objName.setSourceTable(null); 916 917 // Create a new ColumnSource with the TResultColumn as the definition node 918 // This will make isCalculatedColumn() return true 919 ColumnSource source = new ColumnSource( 920 null, // namespace - not from a table 921 columnName, 922 resultCol, // definition node - the TResultColumn with the alias 923 1.0, // high confidence 924 "teradata_named_alias" 925 ); 926 ResolutionResult result = ResolutionResult.exactMatch(source); 927 objName.setResolution(result); 928 resolutionContext.registerResolution(objName, result); 929 930 logDebug("Teradata NAMED alias: " + columnName + " -> alias from SELECT list"); 931 } 932 933 /** 934 * Handle QUALIFY clause alias resolution for Snowflake, BigQuery, and Databricks. 935 * 936 * <p>In Snowflake, BigQuery, and Databricks, column aliases defined in the SELECT list 937 * can be referenced in the QUALIFY clause. This is different from standard SQL where 938 * column aliases are only visible in ORDER BY.</p> 939 * 940 * <p>This method checks if a column reference in the QUALIFY clause matches an alias 941 * from the enclosing SELECT list. If it does, the resolution is updated to indicate 942 * this is a calculated column (alias), not a physical column from the table.</p> 943 * 944 * <p>Example:</p> 945 * <pre> 946 * SELECT RoomNumber, RoomType, BlockFloor, 947 * ROW_NUMBER() OVER (PARTITION BY RoomType ORDER BY BlockFloor) AS row_num 948 * FROM Hospital.Room 949 * QUALIFY row_num = 1 950 * </pre> 951 * <p>Here, row_num in QUALIFY should NOT be linked to Hospital.Room because 952 * row_num is an alias for the window function, not a physical column.</p> 953 * 954 * @param objName The column reference to check 955 */ 956 private void handleQualifyClauseAliasResolution(TObjectName objName) { 957 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 958 959 // Only applies to databases that support QUALIFY with alias visibility 960 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 961 if (dbVendor != EDbVendor.dbvsnowflake && 962 dbVendor != EDbVendor.dbvbigquery && 963 dbVendor != EDbVendor.dbvdatabricks) return; 964 965 String columnName = objName.getColumnNameOnly(); 966 if (columnName == null || columnName.isEmpty()) return; 967 968 // Only apply to UNQUALIFIED column references (no table prefix) 969 if (objName.getTableToken() != null) return; 970 971 // Check if this column is within a QUALIFY clause 972 if (!isInQualifyClause(objName)) return; 973 974 // Get the scope for this column reference 975 IScope scope = columnToScopeMap.get(objName); 976 if (scope == null) return; 977 978 // Find the enclosing SELECT statement from the scope 979 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 980 if (enclosingSelect == null) return; 981 982 // Look for a matching alias in the SELECT list 983 TResultColumnList resultColumns = enclosingSelect.getResultColumnList(); 984 if (resultColumns == null || resultColumns.size() == 0) return; 985 986 TResultColumn matchingResultCol = null; 987 for (int i = 0; i < resultColumns.size(); i++) { 988 TResultColumn resultCol = resultColumns.getResultColumn(i); 989 if (resultCol == null) continue; 990 991 // Check if this result column has an alias matching the column name 992 if (resultCol.getAliasClause() != null && 993 resultCol.getAliasClause().getAliasName() != null) { 994 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 995 if (aliasName != null && aliasName.equalsIgnoreCase(columnName)) { 996 matchingResultCol = resultCol; 997 break; 998 } 999 } 1000 } 1001 1002 if (matchingResultCol == null) return; 1003 1004 // Found a matching alias - clear the source table since this is an alias, not a physical column 1005 objName.setSourceTable(null); 1006 1007 // Create a new ColumnSource with the TResultColumn as the definition node 1008 // This will make isCalculatedColumn() return true 1009 ColumnSource source = new ColumnSource( 1010 null, // namespace - not from a table 1011 columnName, 1012 matchingResultCol, // definition node - the TResultColumn with the alias 1013 1.0, // high confidence 1014 "qualify_clause_alias" 1015 ); 1016 ResolutionResult result = ResolutionResult.exactMatch(source); 1017 objName.setResolution(result); 1018 resolutionContext.registerResolution(objName, result); 1019 1020 logDebug("QUALIFY clause alias: " + columnName + " -> alias from SELECT list"); 1021 } 1022 1023 /** 1024 * Check if a column reference is within a QUALIFY clause. 1025 * 1026 * @param objName The column reference to check 1027 * @return true if the column is within a QUALIFY clause 1028 */ 1029 private boolean isInQualifyClause(TObjectName objName) { 1030 if (objName == null) return false; 1031 1032 // Get the column's scope to find the enclosing SELECT statement 1033 IScope scope = columnToScopeMap.get(objName); 1034 if (scope == null) return false; 1035 1036 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 1037 if (enclosingSelect == null) return false; 1038 1039 // Check if this SELECT has a QUALIFY clause 1040 TQualifyClause qualifyClause = enclosingSelect.getQualifyClause(); 1041 if (qualifyClause == null) return false; 1042 1043 // Check if the column's token position is within the QUALIFY clause's range 1044 if (objName.getStartToken() != null && qualifyClause.getStartToken() != null && 1045 qualifyClause.getEndToken() != null) { 1046 long objPos = objName.getStartToken().posinlist; 1047 long qualifyStart = qualifyClause.getStartToken().posinlist; 1048 long qualifyEnd = qualifyClause.getEndToken().posinlist; 1049 1050 return objPos >= qualifyStart && objPos <= qualifyEnd; 1051 } 1052 1053 return false; 1054 } 1055 1056 /** 1057 * Gets or builds the Teradata NAMED alias index for a SELECT statement. 1058 * Optimization C: Caches the alias map for O(1) lookup instead of O(N) iteration. 1059 * 1060 * @param selectStmt The SELECT statement to get/build the index for 1061 * @return Map from lowercase alias name to TResultColumn, or null if no aliases 1062 */ 1063 private Map<String, TResultColumn> getTeradataNamedAliasIndex(TSelectSqlStatement selectStmt) { 1064 if (selectStmt == null) return null; 1065 1066 // Check cache first 1067 Map<String, TResultColumn> index = teradataNamedAliasCache.get(selectStmt); 1068 if (index != null) { 1069 return index; 1070 } 1071 1072 // Build index for this SELECT statement 1073 TResultColumnList resultColumns = selectStmt.getResultColumnList(); 1074 if (resultColumns == null || resultColumns.size() == 0) { 1075 // Cache empty map to avoid rebuilding 1076 index = java.util.Collections.emptyMap(); 1077 teradataNamedAliasCache.put(selectStmt, index); 1078 return index; 1079 } 1080 1081 index = new java.util.HashMap<>(); 1082 for (int i = 0; i < resultColumns.size(); i++) { 1083 TResultColumn resultCol = resultColumns.getResultColumn(i); 1084 if (resultCol == null) continue; 1085 1086 // Check if this result column has a NAMED alias 1087 if (resultCol.getAliasClause() != null && 1088 resultCol.getAliasClause().getAliasName() != null) { 1089 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 1090 if (aliasName != null && !aliasName.isEmpty()) { 1091 // Store with lowercase key for case-insensitive matching 1092 index.put(aliasName.toLowerCase(), resultCol); 1093 } 1094 } 1095 } 1096 1097 // Cache the index (even if empty, to avoid rebuilding) 1098 teradataNamedAliasCache.put(selectStmt, index); 1099 1100 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !index.isEmpty()) { 1101 logDebug("Built Teradata NAMED alias index for SELECT with " + index.size() + " aliases"); 1102 } 1103 1104 return index; 1105 } 1106 1107 /** 1108 * Check if a column reference (TObjectName) is within a result column's expression. 1109 * This is used to prevent treating source columns in expressions like "CAST(ID AS DECIMAL) AS ID" 1110 * as references to the alias. 1111 * 1112 * @param objName The column reference to check 1113 * @param resultCol The result column to check against 1114 * @return true if objName is within resultCol's expression tree 1115 */ 1116 private boolean isColumnWithinResultColumn(TObjectName objName, TResultColumn resultCol) { 1117 if (objName == null || resultCol == null) return false; 1118 1119 // Get the expression of the result column 1120 TExpression expr = resultCol.getExpr(); 1121 if (expr == null) return false; 1122 1123 // Check by comparing start/end positions 1124 // If objName's position is within resultCol's expression, it's part of it 1125 long objStart = objName.getStartToken() != null ? objName.getStartToken().posinlist : -1; 1126 long objEnd = objName.getEndToken() != null ? objName.getEndToken().posinlist : -1; 1127 long exprStart = expr.getStartToken() != null ? expr.getStartToken().posinlist : -1; 1128 long exprEnd = expr.getEndToken() != null ? expr.getEndToken().posinlist : -1; 1129 1130 if (objStart >= 0 && exprStart >= 0 && objEnd >= 0 && exprEnd >= 0) { 1131 return objStart >= exprStart && objEnd <= exprEnd; 1132 } 1133 1134 return false; 1135 } 1136 1137 /** 1138 * Handle subquery aliased/calculated column resolution. 1139 * 1140 * <p>When a column reference resolves through a subquery (or CTE containing subqueries), 1141 * and the underlying column is an alias or calculated expression, we should NOT trace 1142 * it to the base table. This method ensures that such columns have their sourceTable 1143 * cleared to prevent incorrect attribution.</p> 1144 * 1145 * <p>This is essential for queries like:</p> 1146 * <pre> 1147 * WITH DataCTE AS ( 1148 * SELECT t.col, COUNT(*) AS cnt FROM table1 t ... 1149 * ) 1150 * SELECT * FROM DataCTE 1151 * </pre> 1152 * <p>The 'cnt' column should NOT be traced to 'table1' because it's a calculated column.</p> 1153 * 1154 * @param objName The column reference to check 1155 */ 1156 private void handleSubqueryAliasedColumnResolution(TObjectName objName) { 1157 if (objName == null) return; 1158 1159 // Check if column has a table qualifier pointing to a subquery/CTE 1160 // If so, we should KEEP the sourceTable link for lineage tracing 1161 // The qualifier explicitly tells us which subquery the column belongs to 1162 String tableQualifier = objName.getTableString(); 1163 if (tableQualifier != null && !tableQualifier.isEmpty()) { 1164 IScope scope = columnToScopeMap.get(objName); 1165 if (scope != null) { 1166 TTable qualifiedTable = findTableByQualifier(scope, tableQualifier); 1167 if (qualifiedTable != null && 1168 (qualifiedTable.getSubquery() != null || qualifiedTable.getCTE() != null)) { 1169 // Column has qualifier pointing to a subquery/CTE 1170 // Keep the sourceTable link for lineage tracing (e.g., a.num_emp -> subquery a) 1171 // Don't clear sourceTable - this link is correct and needed 1172 logDebug("Subquery/CTE qualified column: " + objName.toString() + 1173 " - keeping sourceTable link to " + tableQualifier); 1174 return; 1175 } 1176 } 1177 } 1178 1179 // For unqualified columns (or columns qualified with base tables), 1180 // check if this is a calculated column or alias that should not trace to base tables 1181 ColumnSource source = objName.getColumnSource(); 1182 if (source != null) { 1183 if (source.isCalculatedColumn() || source.isColumnAlias()) { 1184 TTable currentSource = objName.getSourceTable(); 1185 if (currentSource != null) { 1186 // Only clear if sourceTable is a base table (not subquery/CTE) 1187 // For subquery/CTE references, keep the link for lineage tracing 1188 if (currentSource.getSubquery() == null && currentSource.getCTE() == null) { 1189 objName.setSourceTable(null); 1190 logDebug("Calculated/alias column: " + objName.getColumnNameOnly() + 1191 " cleared sourceTable (was " + currentSource.getName() + ") - not linked to base table"); 1192 } 1193 } 1194 } 1195 } 1196 } 1197 1198 /** 1199 * Gets or builds the FromScopeIndex for a scope (Performance Optimization B). 1200 * 1201 * <p>This method implements lazy initialization: the index is built on first access 1202 * and cached for subsequent lookups within the same resolution pass.</p> 1203 * 1204 * @param scope The scope to get the index for (SelectScope, UpdateScope, or FromScope) 1205 * @return The cached or newly built FromScopeIndex, or null if scope has no FROM clause 1206 */ 1207 private FromScopeIndex getFromScopeIndex(IScope scope) { 1208 if (scope == null) { 1209 return null; 1210 } 1211 1212 // Get the actual FromScope to use as cache key 1213 IScope fromScope = null; 1214 if (scope instanceof SelectScope) { 1215 fromScope = ((SelectScope) scope).getFromScope(); 1216 } else if (scope instanceof gudusoft.gsqlparser.resolver2.scope.UpdateScope) { 1217 fromScope = ((gudusoft.gsqlparser.resolver2.scope.UpdateScope) scope).getFromScope(); 1218 } else if (scope instanceof FromScope) { 1219 fromScope = scope; 1220 } 1221 1222 if (fromScope == null) { 1223 return null; 1224 } 1225 1226 // Check cache first (lazy initialization) 1227 FromScopeIndex index = fromScopeIndexCache.get(fromScope); 1228 if (index == null) { 1229 // Build index and cache it 1230 index = new FromScopeIndex(fromScope.getChildren()); 1231 fromScopeIndexCache.put(fromScope, index); 1232 1233 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1234 logDebug("Built FromScopeIndex for scope: " + index); 1235 } 1236 } 1237 1238 return index; 1239 } 1240 1241 /** 1242 * Find a table by its qualifier (alias or name) in the scope. 1243 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1244 */ 1245 private TTable findTableByQualifier(IScope scope, String qualifier) { 1246 if (scope == null || qualifier == null) return null; 1247 1248 // Use indexed lookup (Performance Optimization B) 1249 FromScopeIndex index = getFromScopeIndex(scope); 1250 if (index != null) { 1251 return index.findTableByQualifier(qualifier); 1252 } 1253 1254 return null; 1255 } 1256 1257 /** 1258 * Check if a column name is an alias (not a passthrough column) in the subquery. 1259 */ 1260 private boolean isColumnAnAliasInSubquery(TSelectSqlStatement subquery, String columnName) { 1261 if (subquery == null || columnName == null) return false; 1262 1263 TResultColumnList resultCols = subquery.getResultColumnList(); 1264 if (resultCols == null) return false; 1265 1266 for (int i = 0; i < resultCols.size(); i++) { 1267 TResultColumn rc = resultCols.getResultColumn(i); 1268 if (rc == null) continue; 1269 1270 // Check if this result column has an alias matching the column name 1271 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1272 String alias = rc.getAliasClause().getAliasName().toString(); 1273 if (alias != null && alias.equalsIgnoreCase(columnName)) { 1274 // Found matching alias - check if it's a calculated column 1275 TExpression expr = rc.getExpr(); 1276 if (expr != null) { 1277 // Not a simple column reference = calculated 1278 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1279 return true; 1280 } 1281 } 1282 } 1283 } 1284 1285 // Also check for SQL Server proprietary alias syntax: alias = expr 1286 // In this case, the alias is the column name itself 1287 String colName = getResultColumnName(rc); 1288 if (colName != null && colName.equalsIgnoreCase(columnName)) { 1289 TExpression expr = rc.getExpr(); 1290 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1291 return true; 1292 } 1293 } 1294 } 1295 return false; 1296 } 1297 1298 /** 1299 * Get the column name from a result column (handles aliases and SQL Server proprietary syntax). 1300 */ 1301 private String getResultColumnName(TResultColumn rc) { 1302 if (rc == null) return null; 1303 1304 // Check for explicit alias 1305 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1306 return rc.getAliasClause().getAliasName().toString(); 1307 } 1308 1309 // Check for SQL Server proprietary alias: alias = expr 1310 // In this case, the expression itself contains the alias 1311 TExpression expr = rc.getExpr(); 1312 if (expr != null && expr.getExpressionType() == EExpressionType.assignment_t) { 1313 // The left side is the alias 1314 if (expr.getLeftOperand() != null && expr.getLeftOperand().getObjectOperand() != null) { 1315 return expr.getLeftOperand().getObjectOperand().toString(); 1316 } 1317 } 1318 1319 return null; 1320 } 1321 1322 /** 1323 * Find the enclosing SELECT statement from a scope. 1324 * Traverses up the scope hierarchy to find a SelectScope and gets its node. 1325 * 1326 * @param scope The scope to start from 1327 * @return The enclosing SELECT statement, or null if not found 1328 */ 1329 private TSelectSqlStatement findEnclosingSelectFromScope(IScope scope) { 1330 if (scope == null) return null; 1331 1332 IScope currentScope = scope; 1333 int maxIterations = 100; // Prevent infinite loops 1334 int iterations = 0; 1335 1336 while (currentScope != null && iterations < maxIterations) { 1337 iterations++; 1338 1339 // Check if current scope is a SelectScope 1340 if (currentScope instanceof SelectScope) { 1341 TParseTreeNode node = currentScope.getNode(); 1342 if (node instanceof TSelectSqlStatement) { 1343 return (TSelectSqlStatement) node; 1344 } 1345 } 1346 1347 // Move up to parent scope 1348 currentScope = currentScope.getParent(); 1349 } 1350 return null; 1351 } 1352 1353 /** 1354 * Collect a column reference for namespace enhancement if it targets a star namespace. 1355 * This is called during resolution to gather columns that need to be added to namespaces. 1356 * 1357 * @param objName The column reference 1358 * @param scope The scope where the column should be resolved 1359 */ 1360 private void collectForEnhancementIfNeeded(TObjectName objName, IScope scope) { 1361 if (objName == null || scope == null) return; 1362 1363 String columnName = objName.getColumnNameOnly(); 1364 if (columnName == null || columnName.isEmpty()) return; 1365 1366 // Get the resolution result to check status 1367 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = objName.getResolution(); 1368 1369 // Find candidate namespace from scope's FROM clause 1370 INamespace candidateNamespace = findCandidateNamespace(objName, scope); 1371 1372 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1373 logInfo("[TSQLResolver2] collectForEnhancement: column=" + columnName + 1374 ", candidateNs=" + (candidateNamespace != null ? candidateNamespace.getDisplayName() : "null") + 1375 ", hasStar=" + (candidateNamespace != null ? candidateNamespace.hasStarColumn() : "N/A")); 1376 } 1377 1378 if (candidateNamespace != null) { 1379 // Determine confidence based on context 1380 double confidence = 0.7; // Default for unqualified reference 1381 String evidence = "outer_reference"; 1382 1383 // Higher confidence for qualified references (e.g., "a.column") 1384 if (objName.getTableToken() != null) { 1385 confidence = 0.9; 1386 evidence = "qualified_reference"; 1387 } 1388 1389 // Collect for enhancement 1390 namespaceEnhancer.collectColumnRef( 1391 columnName, 1392 candidateNamespace, 1393 objName, 1394 confidence, 1395 evidence 1396 ); 1397 } 1398 } 1399 1400 /** 1401 * Find the candidate namespace for a column reference. 1402 * Looks at the scope's FROM clause to find namespaces with star columns. 1403 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1404 */ 1405 private INamespace findCandidateNamespace(TObjectName objName, IScope scope) { 1406 // Use indexed lookup (Performance Optimization B) 1407 FromScopeIndex index = getFromScopeIndex(scope); 1408 if (index == null) { 1409 return null; 1410 } 1411 1412 String tablePrefix = objName.getTableToken() != null ? 1413 objName.getTableToken().toString() : null; 1414 1415 return index.findCandidateNamespace(tablePrefix); 1416 } 1417 1418 /** 1419 * Delta 1: Collect metadata from DDL statements in the batch. 1420 * 1421 * If no SQLEnv is provided, this method extracts table/column metadata 1422 * from CREATE TABLE and CREATE VIEW statements in the SQL batch and 1423 * creates a TSQLEnv for use during resolution. 1424 * 1425 * This enables standalone resolution of SQL batches that contain both 1426 * DDL and DML without requiring external metadata. 1427 */ 1428 private void collectBatchMetadata() { 1429 if (sqlStatements == null || sqlStatements.size() == 0) { 1430 return; 1431 } 1432 1433 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1434 BatchMetadataCollector collector = new BatchMetadataCollector(sqlStatements, vendor); 1435 TSQLEnv batchEnv = collector.collect(); 1436 1437 if (batchEnv != null) { 1438 setSqlEnv(batchEnv); 1439 logDebug("Collected batch-local DDL metadata into TSQLEnv"); 1440 } 1441 } 1442 1443 /** 1444 * Delta 4: Track database context from USE/SET statements. 1445 * 1446 * Scans the statement list for USE DATABASE, USE SCHEMA, SET SCHEMA, 1447 * and similar statements, and applies the context to TSQLEnv for 1448 * proper resolution of unqualified table names. 1449 */ 1450 private void trackDatabaseContext() { 1451 if (sqlStatements == null || sqlStatements.size() == 0) { 1452 return; 1453 } 1454 1455 DatabaseContextTracker tracker = new DatabaseContextTracker(); 1456 tracker.processStatements(sqlStatements); 1457 1458 // Apply context to TSQLEnv if any context was found 1459 if (tracker.hasContext()) { 1460 TSQLEnv env = getSqlEnv(); 1461 if (env != null) { 1462 tracker.applyDefaults(env); 1463 logDebug("Applied database context: " + tracker); 1464 } else { 1465 // Create a minimal TSQLEnv if none exists 1466 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1467 try { 1468 env = new TSQLEnv(vendor) { 1469 @Override 1470 public void initSQLEnv() { 1471 // Minimal initialization 1472 } 1473 }; 1474 tracker.applyDefaults(env); 1475 setSqlEnv(env); 1476 logDebug("Created minimal TSQLEnv with database context: " + tracker); 1477 } catch (Exception e) { 1478 // TSQLEnv creation failed - context will not be applied 1479 logDebug("Failed to create TSQLEnv for database context: " + e.getMessage()); 1480 } 1481 } 1482 } 1483 } 1484 1485 /** 1486 * Build the global scope 1487 */ 1488 private void buildGlobalScope() { 1489 logDebug("Building global scope"); 1490 1491 // Get SQLEnv and vendor for qualified name resolution 1492 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1493 EDbVendor vendor = EDbVendor.dbvoracle; // Default 1494 1495 // Try to get vendor from statements 1496 if (sqlStatements != null && sqlStatements.size() > 0) { 1497 vendor = sqlStatements.get(0).dbvendor; 1498 } 1499 1500 // Create global scope with sqlEnv and vendor for proper qualified name resolution 1501 globalScope = new GlobalScope(globalContext, config.getNameMatcher(), sqlEnv, vendor); 1502 1503 logDebug("GlobalScope created with defaults: catalog=" + 1504 globalScope.getDefaultCatalog() + ", schema=" + globalScope.getDefaultSchema()); 1505 } 1506 1507 /** 1508 * Process a single statement 1509 */ 1510 private void processStatement(Object statement) { 1511 if (statement instanceof TSelectSqlStatement) { 1512 processSelectStatement((TSelectSqlStatement) statement); 1513 } 1514 // TODO: Add support for INSERT, UPDATE, DELETE, etc. 1515 } 1516 1517 /** 1518 * Process a SELECT statement 1519 */ 1520 private void processSelectStatement(TSelectSqlStatement select) { 1521 processSelectStatement(select, globalScope); 1522 } 1523 1524 /** 1525 * Process a SELECT statement with a specific parent scope. 1526 * This is used for recursive processing of CTE subqueries. 1527 */ 1528 private void processSelectStatement(TSelectSqlStatement select, IScope givenParentScope) { 1529 logDebug("Processing SELECT statement"); 1530 1531 // Create SELECT scope (will be child of CTE scope if CTEs exist, otherwise child of given parent scope) 1532 IScope parentScope = givenParentScope; 1533 1534 // Process CTEs (WITH clause) if present 1535 CTEScope cteScope = null; 1536 if (select.getCteList() != null && select.getCteList().size() > 0) { 1537 cteScope = processCTEs(select.getCteList(), givenParentScope); 1538 parentScope = cteScope; // CTEs become parent of SELECT 1539 } 1540 1541 SelectScope selectScope = new SelectScope(parentScope, select); 1542 1543 // Process FROM clause 1544 if (select.tables != null && select.tables.size() > 0) { 1545 FromScope fromScope = processFromClause(select, selectScope); 1546 selectScope.setFromScope(fromScope); 1547 } 1548 1549 // Process column references in SELECT list 1550 if (select.getResultColumnList() != null) { 1551 List<TObjectName> selectListColumns = collectObjectNamesFromResultColumns(select.getResultColumnList()); 1552 processColumnReferences(selectListColumns, selectScope); 1553 } 1554 1555 // Process WHERE clause 1556 if (select.getWhereClause() != null && 1557 select.getWhereClause().getCondition() != null) { 1558 List<TObjectName> whereColumns = select.getWhereClause().getCondition().getColumnsInsideExpression(); 1559 processColumnReferences(whereColumns, selectScope); 1560 } 1561 1562 // Process GROUP BY clause 1563 GroupByScope groupByScope = null; 1564 if (select.getGroupByClause() != null) { 1565 groupByScope = processGroupBy(select, selectScope); 1566 } 1567 1568 // Process HAVING clause 1569 if (select.getGroupByClause() != null && 1570 select.getGroupByClause().getHavingClause() != null) { 1571 processHaving(select, selectScope, groupByScope); 1572 } 1573 1574 // Process ORDER BY clause 1575 if (select.getOrderbyClause() != null) { 1576 processOrderBy(select, selectScope); 1577 } 1578 } 1579 1580 /** 1581 * Process FROM clause and build FROM scope 1582 */ 1583 private FromScope processFromClause(TSelectSqlStatement select, IScope parentScope) { 1584 FromScope fromScope = new FromScope(parentScope, select.tables); 1585 1586 // Process each relation (table or join) 1587 ArrayList<TTable> relations = select.getRelations(); 1588 if (relations != null) { 1589 for (TTable table : relations) { 1590 processTableOrJoin(table, fromScope); 1591 } 1592 } 1593 1594 return fromScope; 1595 } 1596 1597 /** 1598 * Recursively process a table or join expression and add to FROM scope 1599 */ 1600 private void processTableOrJoin(TTable table, FromScope fromScope) { 1601 if (table.getTableType() == ETableSource.join) { 1602 // This is a JOIN - recursively process left and right tables 1603 TJoinExpr joinExpr = table.getJoinExpr(); 1604 if (joinExpr != null) { 1605 logDebug("Processing JOIN: " + joinExpr.getJointype()); 1606 1607 // Recursively process left table 1608 TTable leftTable = joinExpr.getLeftTable(); 1609 if (leftTable != null) { 1610 processTableOrJoin(leftTable, fromScope); 1611 } 1612 1613 // Recursively process right table 1614 TTable rightTable = joinExpr.getRightTable(); 1615 if (rightTable != null) { 1616 processTableOrJoin(rightTable, fromScope); 1617 } 1618 1619 // TODO: Create JoinScope to handle nullable semantics 1620 // For now, we just add the base tables to FROM scope 1621 } 1622 } else { 1623 // This is a base table (objectname, subquery, etc.) 1624 INamespace namespace = createNamespaceForTable(table); 1625 1626 // Validate namespace (load metadata) 1627 namespace.validate(); 1628 1629 // Determine alias 1630 String alias = table.getAliasName() != null 1631 ? table.getAliasName() 1632 : table.getName(); 1633 1634 // Add to FROM scope 1635 fromScope.addChild(namespace, alias, false); 1636 1637 logDebug("Added table to FROM scope: " + alias); 1638 } 1639 } 1640 1641 /** 1642 * Process CTEs (WITH clause) and build CTE scope 1643 */ 1644 private CTEScope processCTEs(TCTEList cteList, IScope parentScope) { 1645 CTEScope cteScope = new CTEScope(parentScope, cteList); 1646 logDebug("Processing WITH clause with " + cteList.size() + " CTE(s)"); 1647 1648 // Process each CTE in order (later CTEs can reference earlier ones) 1649 for (int i = 0; i < cteList.size(); i++) { 1650 TCTE cte = cteList.getCTE(i); 1651 1652 // Get CTE name 1653 String cteName = cte.getTableName() != null ? cte.getTableName().toString() : null; 1654 if (cteName == null) { 1655 logDebug("Skipping CTE with null name"); 1656 continue; 1657 } 1658 1659 // Get CTE subquery 1660 TSelectSqlStatement cteSubquery = cte.getSubquery(); 1661 if (cteSubquery == null) { 1662 logDebug("Skipping CTE '" + cteName + "' with null subquery"); 1663 continue; 1664 } 1665 1666 // Create CTENamespace 1667 CTENamespace cteNamespace = new CTENamespace( 1668 cte, 1669 cteName, 1670 cteSubquery, 1671 config.getNameMatcher() 1672 ); 1673 1674 // Validate namespace (load column metadata from subquery) 1675 cteNamespace.validate(); 1676 1677 // Add to CTE scope (makes it visible to later CTEs and main query) 1678 cteScope.addCTE(cteName, cteNamespace); 1679 1680 logDebug("Added CTE to scope: " + cteName + 1681 " (columns=" + cteNamespace.getExplicitColumns().size() + 1682 ", recursive=" + cteNamespace.isRecursive() + ")"); 1683 1684 // Recursively process CTE subquery 1685 // This ensures that: 1686 // 1. Columns within the CTE are properly resolved 1687 // 2. Nested CTEs within this CTE are handled 1688 // 3. Later CTEs can reference this CTE's columns 1689 logDebug("Recursively processing CTE subquery: " + cteName); 1690 processSelectStatement(cteSubquery, cteScope); 1691 } 1692 1693 return cteScope; 1694 } 1695 1696 /** 1697 * Process GROUP BY clause and build GROUP BY scope 1698 */ 1699 private GroupByScope processGroupBy(TSelectSqlStatement select, SelectScope selectScope) { 1700 GroupByScope groupByScope = new GroupByScope(selectScope, select.getGroupByClause()); 1701 logDebug("Processing GROUP BY clause"); 1702 1703 // Set the FROM scope for column resolution 1704 if (selectScope.getFromScope() != null) { 1705 groupByScope.setFromScope(selectScope.getFromScope()); 1706 } 1707 1708 // Process column references in GROUP BY items 1709 if (select.getGroupByClause().getItems() != null) { 1710 for (int i = 0; i < select.getGroupByClause().getItems().size(); i++) { 1711 gudusoft.gsqlparser.nodes.TGroupByItem item = select.getGroupByClause().getItems().getGroupByItem(i); 1712 if (item.getExpr() != null) { 1713 List<TObjectName> groupByColumns = item.getExpr().getColumnsInsideExpression(); 1714 processColumnReferences(groupByColumns, groupByScope); 1715 } 1716 } 1717 } 1718 1719 return groupByScope; 1720 } 1721 1722 /** 1723 * Process HAVING clause and build HAVING scope 1724 */ 1725 private void processHaving(TSelectSqlStatement select, SelectScope selectScope, GroupByScope groupByScope) { 1726 logDebug("Processing HAVING clause"); 1727 1728 HavingScope havingScope = new HavingScope( 1729 selectScope, 1730 select.getGroupByClause().getHavingClause() 1731 ); 1732 1733 // Set GROUP BY scope for grouped column resolution 1734 if (groupByScope != null) { 1735 havingScope.setGroupByScope(groupByScope); 1736 } 1737 1738 // Set SELECT scope for alias resolution 1739 havingScope.setSelectScope(selectScope); 1740 1741 // Process column references in HAVING condition 1742 List<TObjectName> havingColumns = select.getGroupByClause().getHavingClause().getColumnsInsideExpression(); 1743 processColumnReferences(havingColumns, havingScope); 1744 } 1745 1746 /** 1747 * Process ORDER BY clause and build ORDER BY scope 1748 */ 1749 private void processOrderBy(TSelectSqlStatement select, SelectScope selectScope) { 1750 logDebug("Processing ORDER BY clause"); 1751 1752 OrderByScope orderByScope = new OrderByScope(selectScope, select.getOrderbyClause()); 1753 1754 // Set SELECT scope for alias resolution 1755 orderByScope.setSelectScope(selectScope); 1756 1757 // Set FROM scope for direct column resolution (database-dependent) 1758 if (selectScope.getFromScope() != null) { 1759 orderByScope.setFromScope(selectScope.getFromScope()); 1760 } 1761 1762 // Process column references in ORDER BY items 1763 if (select.getOrderbyClause().getItems() != null) { 1764 for (int i = 0; i < select.getOrderbyClause().getItems().size(); i++) { 1765 gudusoft.gsqlparser.nodes.TOrderByItem item = select.getOrderbyClause().getItems().getOrderByItem(i); 1766 if (item.getSortKey() != null) { 1767 List<TObjectName> orderByColumns = item.getSortKey().getColumnsInsideExpression(); 1768 processColumnReferences(orderByColumns, orderByScope); 1769 } 1770 } 1771 } 1772 } 1773 1774 /** 1775 * Create appropriate namespace for a table 1776 */ 1777 private INamespace createNamespaceForTable(TTable table) { 1778 // Check if it's a subquery 1779 if (table.getSubquery() != null) { 1780 return new SubqueryNamespace( 1781 table.getSubquery(), 1782 table.getAliasName(), 1783 config.getNameMatcher() 1784 ); 1785 } 1786 1787 // Regular table - pass sqlEnv and vendor for qualified name resolution 1788 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1789 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 1790 return new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 1791 } 1792 1793 /** 1794 * Collect all TObjectName from TResultColumnList 1795 */ 1796 private List<TObjectName> collectObjectNamesFromResultColumns( 1797 gudusoft.gsqlparser.nodes.TResultColumnList resultColumns) { 1798 List<TObjectName> objNames = new ArrayList<>(); 1799 1800 for (int i = 0; i < resultColumns.size(); i++) { 1801 gudusoft.gsqlparser.nodes.TResultColumn rc = resultColumns.getResultColumn(i); 1802 if (rc.getExpr() != null) { 1803 // Get all column references from the expression 1804 List<TObjectName> exprColumns = rc.getExpr().getColumnsInsideExpression(); 1805 if (exprColumns != null) { 1806 objNames.addAll(exprColumns); 1807 } 1808 } 1809 } 1810 1811 return objNames; 1812 } 1813 1814 /** 1815 * Process column references (TObjectName list) 1816 */ 1817 private void processColumnReferences(List<TObjectName> objectNames, IScope scope) { 1818 if (objectNames == null) return; 1819 1820 for (TObjectName objName : objectNames) { 1821 // Record column-to-scope mapping for iterative resolution (Principle 1) 1822 columnToScopeMap.put(objName, scope); 1823 allColumnReferences.add(objName); 1824 1825 // Resolve the column reference 1826 nameResolver.resolve(objName, scope); 1827 1828 // Handle USING column priority for JOIN...USING syntax 1829 handleUsingColumnResolution(objName); 1830 1831 // Handle Teradata NAMED alias resolution 1832 handleTeradataNamedAliasResolution(objName); 1833 handleQualifyClauseAliasResolution(objName); 1834 } 1835 } 1836 1837 // Detailed legacy sync timing (for profiling) 1838 private static long globalTimeClearLinked = 0; 1839 private static long globalTimeFillAttributes = 0; 1840 private static long globalTimeSyncColumns = 0; 1841 private static long globalTimePopulateOrphans = 0; 1842 private static long globalTimeClearHints = 0; 1843 1844 /** 1845 * Get detailed legacy sync timing breakdown. 1846 */ 1847 public static String getLegacySyncTimings() { 1848 long total = globalTimeClearLinked + globalTimeFillAttributes + globalTimeSyncColumns + globalTimePopulateOrphans + globalTimeClearHints; 1849 return String.format( 1850 "LegacySync Breakdown:\n" + 1851 " ClearLinkedColumns: %d ms (%.1f%%)\n" + 1852 " FillTableAttributes: %d ms (%.1f%%)\n" + 1853 " SyncColumnToLegacy: %d ms (%.1f%%)\n" + 1854 " PopulateOrphanColumns: %d ms (%.1f%%)\n" + 1855 " ClearSyntaxHints: %d ms (%.1f%%)\n" + 1856 " Total: %d ms", 1857 globalTimeClearLinked, total > 0 ? 100.0 * globalTimeClearLinked / total : 0, 1858 globalTimeFillAttributes, total > 0 ? 100.0 * globalTimeFillAttributes / total : 0, 1859 globalTimeSyncColumns, total > 0 ? 100.0 * globalTimeSyncColumns / total : 0, 1860 globalTimePopulateOrphans, total > 0 ? 100.0 * globalTimePopulateOrphans / total : 0, 1861 globalTimeClearHints, total > 0 ? 100.0 * globalTimeClearHints / total : 0, 1862 total); 1863 } 1864 1865 /** 1866 * Create cloned columns for star column tracing. 1867 * 1868 * <p>This is a CORE part of TSQLResolver2's name resolution. When a column traces 1869 * through a CTE or subquery with SELECT * to a physical table, we create a cloned 1870 * TObjectName with sourceTable pointing to the traced physical table. 1871 * 1872 * <p>Example: 1873 * <pre> 1874 * WITH cte AS (SELECT * FROM physical_table) 1875 * SELECT a FROM cte 1876 * </pre> 1877 * 1878 * <p>For column 'a' in the outer SELECT: 1879 * <ul> 1880 * <li>Original column: sourceTable = cte (immediate source)</li> 1881 * <li>Cloned column: sourceTable = physical_table (traced through star)</li> 1882 * </ul> 1883 * 1884 * <p>Both columns are added to allColumnReferences for complete lineage tracking. 1885 * This ensures the formatter can output both the immediate source and the traced 1886 * physical table when needed. 1887 */ 1888 private void createTracedColumnClones() { 1889 // Collect clones to add (avoid ConcurrentModificationException) 1890 java.util.List<TObjectName> clonesToAdd = new java.util.ArrayList<>(); 1891 1892 // Build HashSet of existing (sourceTable identity, columnNameLower) pairs for O(1) dedup 1893 java.util.Set<String> existingKeys = new java.util.HashSet<>(); 1894 for (TObjectName existing : allColumnReferences) { 1895 if (existing.getSourceTable() != null) { 1896 String existingColName = existing.getColumnNameOnly(); 1897 if (existingColName != null) { 1898 String key = System.identityHashCode(existing.getSourceTable()) 1899 + ":" + existingColName.toLowerCase(); 1900 existingKeys.add(key); 1901 } 1902 } 1903 } 1904 1905 for (TObjectName column : allColumnReferences) { 1906 // Skip star columns - they represent all columns from a table and shouldn't be cloned 1907 String colName = column.getColumnNameOnly(); 1908 if (colName != null && colName.equals("*")) { 1909 continue; 1910 } 1911 1912 // Skip columns without resolution 1913 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = column.getResolution(); 1914 if (resolution == null || !resolution.isExactMatch()) { 1915 continue; 1916 } 1917 1918 gudusoft.gsqlparser.resolver2.model.ColumnSource source = resolution.getColumnSource(); 1919 if (source == null) { 1920 continue; 1921 } 1922 1923 TTable sourceTable = column.getSourceTable(); 1924 if (sourceTable == null) { 1925 continue; 1926 } 1927 1928 // Only process CTE or subquery columns 1929 if (!sourceTable.isCTEName() && sourceTable.getTableType() != ETableSource.subquery) { 1930 continue; 1931 } 1932 1933 // Get the traced physical table 1934 TTable finalTable = source.getFinalTable(); 1935 if (finalTable == null || finalTable == sourceTable) { 1936 continue; 1937 } 1938 1939 // Skip if finalTable is also a CTE or subquery 1940 if (finalTable.isCTEName() || finalTable.getTableType() == ETableSource.subquery) { 1941 continue; 1942 } 1943 1944 // Skip subquery columns when the column matches an explicit column in the subquery's 1945 // SELECT list. Cloning is only needed when tracing through star columns. 1946 // For example, in "SELECT al1.COL1, al1.COL3 FROM (SELECT t1.COL1, t2.* FROM t1, t2) al1": 1947 // - al1.COL1 matches explicit "t1.COL1" -> don't clone (stays at subquery level) 1948 // - al1.COL3 doesn't match explicit column, must come from t2.* -> clone to t2 1949 if (sourceTable.getTableType() == ETableSource.subquery) { 1950 TSelectSqlStatement subquery = sourceTable.getSubquery(); 1951 if (subquery != null && subqueryHasExplicitColumn(subquery, colName)) { 1952 continue; 1953 } 1954 } 1955 1956 // Skip UNION scenarios - syncToLegacyStructures already handles linking to all 1957 // UNION branch tables via getAllFinalTables(). Creating clones would cause duplicates. 1958 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 1959 if (allFinalTables != null && allFinalTables.size() > 1) { 1960 continue; 1961 } 1962 1963 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 1964 // subquery's underlying table via star column expansion. 1965 // This is particularly important for MERGE ON clause columns which may 1966 // belong to the target table rather than the source subquery. 1967 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 1968 // the source subquery. 1969 // Note: We check location only because ownStmt may be null for unresolved columns. 1970 if (column.getLocation() == ESqlClause.joinCondition 1971 && (column.getTableString() == null || column.getTableString().isEmpty())) { 1972 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1973 logInfo("createTracedColumnClones: Skipping unqualified join condition column " + column.toString() + 1974 " - should not be traced to subquery's underlying table"); 1975 } 1976 continue; 1977 } 1978 1979 // O(1) dedup check using HashSet instead of O(n) linear scan 1980 String dedupeKey = System.identityHashCode(finalTable) 1981 + ":" + (colName != null ? colName.toLowerCase() : ""); 1982 if (existingKeys.add(dedupeKey)) { 1983 // Clone the column and set sourceTable to the traced physical table 1984 TObjectName clonedColumn = column.clone(); 1985 clonedColumn.setSourceTable(finalTable); 1986 clonesToAdd.add(clonedColumn); 1987 1988 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1989 logInfo("createTracedColumnClones: Cloned column " + column.toString() + 1990 " with sourceTable traced from " + sourceTable.getTableName() + 1991 " to physical table " + finalTable.getTableName()); 1992 } 1993 } 1994 } 1995 1996 // Add all clones to allColumnReferences (local copy in TSQLResolver2) 1997 allColumnReferences.addAll(clonesToAdd); 1998 1999 // Also add to scopeBuildResult so consumers using scopeBuildResult.getAllColumnReferences() 2000 // (like TestGetTableColumn2 for star column expansion tests) can see the clones 2001 if (scopeBuildResult != null && !clonesToAdd.isEmpty()) { 2002 scopeBuildResult.addColumnReferences(clonesToAdd); 2003 } 2004 2005 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !clonesToAdd.isEmpty()) { 2006 logInfo("createTracedColumnClones: Created " + clonesToAdd.size() + " traced column clones"); 2007 } 2008 } 2009 2010 /** 2011 * Sync results to legacy structures for backward compatibility. 2012 * This populates: 2013 * - TTable.linkedColumns: columns resolved to this table 2014 * - TObjectName.sourceTable: already set in setResolution() 2015 * - TObjectName.linkedColumnDef: from ColumnSource.definitionNode 2016 * - TObjectName.sourceColumn: from ColumnSource.definitionNode (if TResultColumn) 2017 */ 2018 private void syncToLegacyStructures() { 2019 if (!config.isLegacyCompatibilityEnabled()) { 2020 logInfo("Legacy compatibility disabled, skipping sync"); 2021 return; 2022 } 2023 2024 for (int i = 0; i < sqlStatements.size(); i++) { 2025 } 2026 2027 logInfo("Syncing to legacy structures..."); 2028 2029 long phaseStart; 2030 2031 // Clear existing linkedColumns on all tables 2032 phaseStart = System.currentTimeMillis(); 2033 clearAllLinkedColumns(); 2034 2035 // Clear existing orphanColumns on all statements 2036 // These will be repopulated in Phase 4b based on TSQLResolver2 resolution 2037 for (int i = 0; i < sqlStatements.size(); i++) { 2038 clearOrphanColumnsRecursive(sqlStatements.get(i)); 2039 } 2040 globalTimeClearLinked += System.currentTimeMillis() - phaseStart; 2041 2042 // Phase 1: Fill TTable.getAttributes() for all tables 2043 // This uses the namespace data already collected during resolution 2044 phaseStart = System.currentTimeMillis(); 2045 Set<TTable> processedTables = new HashSet<>(); 2046 for (int i = 0; i < sqlStatements.size(); i++) { 2047 fillTableAttributesRecursive(sqlStatements.get(i), processedTables); 2048 } 2049 globalTimeFillAttributes += System.currentTimeMillis() - phaseStart; 2050 logInfo("Filled attributes for " + processedTables.size() + " tables"); 2051 2052 // Phase 2: Iterate through all column references and sync to legacy structures 2053 phaseStart = System.currentTimeMillis(); 2054 int syncCount = 0; 2055 for (TObjectName column : allColumnReferences) { 2056 if (syncColumnToLegacy(column)) { 2057 syncCount++; 2058 } 2059 } 2060 globalTimeSyncColumns += System.currentTimeMillis() - phaseStart; 2061 2062 // Phase 3: Link CTAS target table columns 2063 // For CREATE TABLE AS SELECT, the SELECT list columns should be linked to the target table 2064 for (int i = 0; i < sqlStatements.size(); i++) { 2065 linkCTASTargetTableColumns(sqlStatements.get(i)); 2066 } 2067 2068 // Phase 4: Sync implicit database/schema from USE DATABASE/USE SCHEMA to AST 2069 // This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2070 syncImplicitDbSchemaToAST(); 2071 2072 // Phase 4b: Populate orphan columns 2073 // Columns with sourceTable=null (unresolved or ambiguous) should be added to 2074 // their containing statement's orphanColumns list. This enables TGetTableColumn 2075 // to report them as orphan columns (with linkOrphanColumnToFirstTable option). 2076 phaseStart = System.currentTimeMillis(); 2077 populateOrphanColumns(); 2078 globalTimePopulateOrphans += System.currentTimeMillis() - phaseStart; 2079 2080 // Phase 4c: Expand star columns using push-down inferred columns 2081 // For SELECT * and SELECT table.*, expand to individual columns based on: 2082 // 1. Inferred columns from the namespace (via push-down algorithm) 2083 // 2. This enables star column expansion without TSQLEnv metadata 2084 phaseStart = System.currentTimeMillis(); 2085 expandStarColumnsUsingPushDown(); 2086 long expandTime = System.currentTimeMillis() - phaseStart; 2087 logInfo("Star column expansion took " + expandTime + "ms"); 2088 2089 // Phase 5: Clear orphan column syntax hints for resolved columns 2090 // The old resolver adds "sphint" (syntax hint) warnings for columns that can't be resolved. 2091 // TSQLResolver2 resolves these columns but doesn't clear the syntax hints. 2092 // This phase cleans up those hints to maintain compatibility with tests expecting no hints. 2093 phaseStart = System.currentTimeMillis(); 2094 clearOrphanColumnSyntaxHints(); 2095 globalTimeClearHints += System.currentTimeMillis() - phaseStart; 2096 2097 logInfo("Legacy sync complete: " + syncCount + "/" + allColumnReferences.size() + " columns synced"); 2098 } 2099 2100 /** 2101 * Link SELECT list columns to CTAS target table. 2102 * For CREATE TABLE AS SELECT statements, the output column names (aliases) 2103 * should be linked to the target table. The source column references 2104 * remain linked to their source tables. 2105 * 2106 * NOTE: For CTAS, the parser (TCreateTableSqlStatement.doParseStatement) already 2107 * correctly creates and links alias columns to the target table. The source columns 2108 * that were incorrectly added are filtered out in clearLinkedColumnsRecursive(). 2109 * This method now only handles cases where the parser didn't create alias columns. 2110 */ 2111 private void linkCTASTargetTableColumns(TCustomSqlStatement stmt) { 2112 if (stmt == null) return; 2113 2114 // CTAS columns are already handled by the parser (TCreateTableSqlStatement.doParseStatement) 2115 // and incorrectly added source columns are filtered in clearLinkedColumnsRecursive(). 2116 // No additional processing needed here for CTAS. 2117 2118 // Process nested statements (for other statement types that might need CTAS handling) 2119 for (int i = 0; i < stmt.getStatements().size(); i++) { 2120 linkCTASTargetTableColumns(stmt.getStatements().get(i)); 2121 } 2122 } 2123 2124 /** 2125 * Populate orphanColumns for unresolved columns. 2126 * Columns with sourceTable=null should be added to their containing statement's orphanColumns. 2127 * This enables TGetTableColumn to report these as "missed" columns. 2128 */ 2129 private void populateOrphanColumns() { 2130 int addedCount = 0; 2131 for (TObjectName column : allColumnReferences) { 2132 if (column == null) continue; 2133 2134 // Skip non-column types that should not be in orphan columns 2135 EDbObjectType dbObjectType = column.getDbObjectType(); 2136 if (dbObjectType == EDbObjectType.column_alias // alias clause column definitions (e.g., AS x (numbers, animals)) 2137 || dbObjectType == EDbObjectType.variable // stored procedure variables 2138 || dbObjectType == EDbObjectType.parameter // stored procedure parameters 2139 || dbObjectType == EDbObjectType.cursor // cursors 2140 || dbObjectType == EDbObjectType.constant // constants 2141 || dbObjectType == EDbObjectType.label // labels 2142 ) { 2143 continue; 2144 } 2145 2146 // Check resolution status directly - ambiguous columns should be added to orphanColumns 2147 // Note: column.getColumnSource() returns the first candidate for ambiguous columns, 2148 // which would cause them to be incorrectly skipped. We need to check the resolution status first. 2149 // IMPORTANT: This check must come BEFORE the sourceTable check because Phase 1 (linkColumnToTable) 2150 // might have already set sourceTable during parsing, but TSQLResolver2 correctly marked it as ambiguous. 2151 // NOTE: Skip star columns (*) since they are handled specially via sourceTableList 2152 ResolutionResult resolution = column.getResolution(); 2153 String columnName = column.getColumnNameOnly(); 2154 boolean isStarColumn = columnName != null && columnName.equals("*"); 2155 2156 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS && !isStarColumn) { 2157 // Ambiguous columns should be added to orphanColumns so they appear as "missed" 2158 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) so the column 2159 // doesn't also appear as resolved in the output 2160 if (column.getSourceTable() != null) { 2161 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2162 logInfo("populateOrphanColumns: Clearing sourceTable for AMBIGUOUS column: " + column.toString() 2163 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2164 + " was linked to " + column.getSourceTable().getTableName() 2165 + " with " + (resolution.getAmbiguousSource() != null ? 2166 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 2167 } 2168 column.setSourceTable(null); 2169 } 2170 // Fall through to add to orphanColumns 2171 } else { 2172 // Star columns (*) should NEVER be orphan columns - they represent all columns 2173 // from all tables and are handled specially via sourceTableList and linked 2174 // to tables in syncColumnToLegacy() which runs after this phase. 2175 if (isStarColumn) { 2176 continue; 2177 } 2178 2179 // For non-ambiguous columns, skip if they have a sourceTable 2180 if (column.getSourceTable() != null) { 2181 continue; 2182 } 2183 2184 // Also skip columns that have a ColumnSource with a valid table 2185 ColumnSource source = column.getColumnSource(); 2186 if (source != null) { 2187 TTable finalTable = source.getFinalTable(); 2188 if (finalTable != null) { 2189 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2190 logInfo("populateOrphanColumns: Skipping column with ColumnSource: " + column.toString() 2191 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2192 + " -> resolved to " + finalTable.getTableName()); 2193 } 2194 continue; 2195 } 2196 // Also check overrideTable for derived table columns 2197 TTable overrideTable = source.getOverrideTable(); 2198 if (overrideTable != null) { 2199 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2200 logInfo("populateOrphanColumns: Skipping column with ColumnSource (override): " + column.toString() 2201 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2202 + " -> resolved to " + overrideTable.getTableName()); 2203 } 2204 continue; 2205 } 2206 } 2207 } 2208 2209 // Debug: log columns being added to orphan 2210 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2211 ColumnSource debugSource = column.getColumnSource(); 2212 logInfo("populateOrphanColumns: Adding orphan column: " + column.toString() 2213 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2214 + ", hasColumnSource=" + (debugSource != null) 2215 + (debugSource != null ? ", namespace=" + (debugSource.getSourceNamespace() != null ? 2216 debugSource.getSourceNamespace().getClass().getSimpleName() : "null") : "")); 2217 } 2218 2219 // Find the containing statement for this column 2220 TCustomSqlStatement containingStmt = findContainingStatement(column); 2221 if (containingStmt != null) { 2222 // Set ownStmt so TSQLResolver2ResultFormatter can use getOwnStmt().getFirstPhysicalTable() 2223 // to link orphan columns to the first physical table (matching TGetTableColumn behavior) 2224 column.setOwnStmt(containingStmt); 2225 2226 TObjectNameList orphanColumns = containingStmt.getOrphanColumns(); 2227 if (orphanColumns != null && !containsColumn(orphanColumns, column)) { 2228 orphanColumns.addObjectName(column); 2229 addedCount++; 2230 } 2231 } else { 2232 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2233 logInfo("Could not find containing statement for orphan column: " + column.toString()); 2234 } 2235 } 2236 } 2237 logInfo("Populated " + addedCount + " orphan columns"); 2238 } 2239 2240 /** 2241 * Find the statement that contains a column reference. 2242 * First tries to use the scope information (more reliable), then falls back to AST traversal. 2243 * For PL/SQL blocks, searches for the innermost DML statement that contains the column. 2244 */ 2245 private TCustomSqlStatement findContainingStatement(TObjectName column) { 2246 // First, try to use the scope information from columnToScopeMap 2247 // The scope's node is typically the containing statement 2248 IScope scope = columnToScopeMap.get(column); 2249 if (scope != null) { 2250 TParseTreeNode scopeNode = scope.getNode(); 2251 if (scopeNode instanceof TCustomSqlStatement) { 2252 TCustomSqlStatement stmt = (TCustomSqlStatement) scopeNode; 2253 // If the scope is a PL/SQL block or procedure, search for DML statements within it 2254 // that actually contain the column (by line number) 2255 if (isPLSQLBlockStatement(stmt)) { 2256 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2257 if (dmlStmt != null) { 2258 return dmlStmt; 2259 } 2260 } 2261 return stmt; 2262 } 2263 } 2264 2265 // Fallback: traverse up the AST to find the nearest TCustomSqlStatement parent 2266 TParseTreeNode node = column; 2267 while (node != null) { 2268 if (node instanceof TCustomSqlStatement) { 2269 return (TCustomSqlStatement) node; 2270 } 2271 node = node.getParentObjectName(); 2272 } 2273 2274 // Last resort: search all statements for a DML statement containing the column 2275 TCustomSqlStatement result = null; 2276 if (sqlStatements.size() > 0) { 2277 for (int i = 0; i < sqlStatements.size(); i++) { 2278 TCustomSqlStatement stmt = sqlStatements.get(i); 2279 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2280 if (dmlStmt != null) { 2281 result = dmlStmt; 2282 break; 2283 } 2284 } 2285 if (result == null) { 2286 result = sqlStatements.get(0); 2287 } 2288 } 2289 return result; 2290 } 2291 2292 /** 2293 * Check if a statement is a PL/SQL block type statement. 2294 */ 2295 private boolean isPLSQLBlockStatement(TCustomSqlStatement stmt) { 2296 if (stmt == null) return false; 2297 String className = stmt.getClass().getSimpleName(); 2298 return className.startsWith("TPlsql") || className.startsWith("TPLSql") || 2299 className.contains("Block") || className.contains("Procedure") || 2300 className.contains("Function") || className.contains("Package"); 2301 } 2302 2303 /** 2304 * DML Statement Range for efficient line-based lookup. 2305 * Used by the DML index cache (Performance Optimization A). 2306 */ 2307 private static class DmlRange implements Comparable<DmlRange> { 2308 final long startLine; 2309 final long endLine; 2310 final TCustomSqlStatement stmt; 2311 2312 DmlRange(TCustomSqlStatement stmt) { 2313 this.stmt = stmt; 2314 this.startLine = stmt.getStartToken() != null ? stmt.getStartToken().lineNo : -1; 2315 this.endLine = stmt.getEndToken() != null ? stmt.getEndToken().lineNo : -1; 2316 } 2317 2318 boolean contains(long line) { 2319 return startLine >= 0 && startLine <= line && line <= endLine; 2320 } 2321 2322 // Sort by startLine for binary search 2323 @Override 2324 public int compareTo(DmlRange other) { 2325 return Long.compare(this.startLine, other.startLine); 2326 } 2327 } 2328 2329 /** 2330 * Cache for DML statement ranges per parent statement (Performance Optimization A). 2331 * Built lazily on first access, cleared at start of each resolve() call. 2332 * Uses IdentityHashMap because we need object identity, not equals(). 2333 */ 2334 private final Map<TCustomSqlStatement, List<DmlRange>> dmlIndexCache = new IdentityHashMap<>(); 2335 2336 /** 2337 * Build DML index for a parent statement. 2338 */ 2339 private List<DmlRange> buildDmlIndex(TCustomSqlStatement parent) { 2340 final List<DmlRange> ranges = new ArrayList<>(); 2341 parent.acceptChildren(new TParseTreeVisitor() { 2342 @Override 2343 public void preVisit(TInsertSqlStatement stmt) { 2344 ranges.add(new DmlRange(stmt)); 2345 } 2346 @Override 2347 public void preVisit(TUpdateSqlStatement stmt) { 2348 ranges.add(new DmlRange(stmt)); 2349 } 2350 @Override 2351 public void preVisit(TDeleteSqlStatement stmt) { 2352 ranges.add(new DmlRange(stmt)); 2353 } 2354 @Override 2355 public void preVisit(TSelectSqlStatement stmt) { 2356 ranges.add(new DmlRange(stmt)); 2357 } 2358 }); 2359 // Sort by startLine for efficient lookup 2360 java.util.Collections.sort(ranges); 2361 return ranges; 2362 } 2363 2364 /** 2365 * Get or build the DML index for a parent statement (Performance Optimization A). 2366 */ 2367 private List<DmlRange> getDmlIndex(TCustomSqlStatement parent) { 2368 return dmlIndexCache.computeIfAbsent(parent, this::buildDmlIndex); 2369 } 2370 2371 /** 2372 * Find the innermost DML statement (INSERT/UPDATE/DELETE/SELECT) within a parent statement 2373 * that contains the given column reference (by line number range). 2374 * Uses cached DML index for O(log N) lookup instead of O(N) traversal. 2375 */ 2376 private TCustomSqlStatement findDMLStatementContaining(TCustomSqlStatement parent, TObjectName column) { 2377 if (parent == null || column == null) return null; 2378 2379 long columnLine = column.getLineNo(); 2380 TCustomSqlStatement result = null; 2381 2382 // Use cached DML index (Performance Optimization A) 2383 List<DmlRange> ranges = getDmlIndex(parent); 2384 2385 // Find all DML statements that contain the column by line number 2386 // Need to check all ranges that could contain the column (can't use pure binary search 2387 // because ranges can overlap and we want the innermost one) 2388 for (DmlRange range : ranges) { 2389 // Optimization: if startLine > columnLine, no more ranges can contain it 2390 if (range.startLine > columnLine) { 2391 break; 2392 } 2393 if (range.contains(columnLine)) { 2394 // Found a matching DML statement - prefer the innermost one (later startLine) 2395 if (result == null || 2396 (range.startLine >= result.getStartToken().lineNo)) { 2397 result = range.stmt; 2398 } 2399 } 2400 } 2401 2402 return result; 2403 } 2404 2405 /** 2406 * Sync implicit database/schema from USE DATABASE/USE SCHEMA statements to AST. 2407 * This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2408 * for unqualified object names. 2409 * 2410 * This is similar to what TDatabaseObjectResolver does in the legacy resolver: 2411 * it visits all TObjectName nodes and sets implicitDatabaseName/implicitSchemaName 2412 * based on the current database/schema context. 2413 */ 2414 private void syncImplicitDbSchemaToAST() { 2415 // Get the tracked database context 2416 TSQLEnv env = getSqlEnv(); 2417 if (env == null) { 2418 return; 2419 } 2420 2421 String defaultCatalog = env.getDefaultCatalogName(); 2422 String defaultSchema = env.getDefaultSchemaName(); 2423 2424 // If no defaults are set, nothing to sync 2425 if ((defaultCatalog == null || defaultCatalog.isEmpty()) && 2426 (defaultSchema == null || defaultSchema.isEmpty())) { 2427 return; 2428 } 2429 2430 logDebug("Syncing implicit DB/schema to AST: catalog=" + defaultCatalog + ", schema=" + defaultSchema); 2431 2432 // Visit all statements and set implicit names on TObjectName nodes 2433 for (int i = 0; i < sqlStatements.size(); i++) { 2434 TCustomSqlStatement stmt = sqlStatements.get(i); 2435 if (stmt != null) { 2436 stmt.acceptChildren(new ImplicitDbSchemaVisitor(defaultCatalog, defaultSchema)); 2437 } 2438 } 2439 } 2440 2441 /** 2442 * Visitor to set implicit database/schema on TObjectName nodes. 2443 */ 2444 private static class ImplicitDbSchemaVisitor extends TParseTreeVisitor { 2445 private final String defaultCatalog; 2446 private final String defaultSchema; 2447 2448 public ImplicitDbSchemaVisitor(String defaultCatalog, String defaultSchema) { 2449 this.defaultCatalog = defaultCatalog; 2450 this.defaultSchema = defaultSchema; 2451 } 2452 2453 @Override 2454 public void preVisit(TObjectName node) { 2455 if (node == null) return; 2456 2457 // Skip column objects - they don't need implicit DB/schema 2458 if (node.getDbObjectType() == EDbObjectType.column) return; 2459 2460 // Skip objects with a db_link - they refer to remote databases 2461 // and should not inherit the current session's default schema/catalog 2462 if (node.getDblink() != null) return; 2463 2464 // Set default database name if not qualified 2465 if (defaultCatalog != null && !defaultCatalog.isEmpty() && node.getDatabaseToken() == null) { 2466 node.setImplictDatabaseName(defaultCatalog); 2467 } 2468 2469 // Set default schema name if not qualified 2470 if (defaultSchema != null && !defaultSchema.isEmpty() && node.getSchemaToken() == null) { 2471 node.setImplictSchemaName(defaultSchema); 2472 } 2473 } 2474 } 2475 2476 /** 2477 * Selectively clear orphan column syntax hints (sphint) based on TSQLResolver2 resolution. 2478 * 2479 * Phase 1 (linkColumnToTable during parsing) adds sphint hints for columns it can't resolve. 2480 * TSQLResolver2 should: 2481 * 1. KEEP sphint hints for columns that are in allColumnReferences with NOT_FOUND/AMBIGUOUS status 2482 * (these are genuinely orphan/ambiguous columns) 2483 * 2. CLEAR sphint hints for all other columns: 2484 * - Columns successfully resolved (EXACT_MATCH) 2485 * - Columns filtered out by ScopeBuilder (package constants, function keywords, etc.) 2486 * - Columns in contexts TSQLResolver2 doesn't collect (MERGE VALUES, etc.) 2487 */ 2488 private void clearOrphanColumnSyntaxHints() { 2489 // Build a set of positions for columns that should KEEP their sphint hints 2490 // These are columns in allColumnReferences with NOT_FOUND or AMBIGUOUS status 2491 Set<String> orphanPositions = new HashSet<>(); 2492 2493 for (TObjectName col : allColumnReferences) { 2494 if (col == null) continue; 2495 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = col.getResolution(); 2496 if (resolution != null) { 2497 ResolutionStatus status = resolution.getStatus(); 2498 // Only keep sphint for genuinely AMBIGUOUS columns 2499 // NOT_FOUND columns might be due to TSQLResolver2 scope issues (e.g., MERGE WHEN clause) 2500 // so we clear their sphint to match old resolver behavior 2501 if (status == ResolutionStatus.AMBIGUOUS) { 2502 TSourceToken startToken = col.getStartToken(); 2503 if (startToken != null) { 2504 String key = startToken.lineNo + ":" + startToken.columnNo; 2505 orphanPositions.add(key); 2506 } 2507 } 2508 } 2509 } 2510 2511 // Clear sphint hints for positions NOT in orphanPositions 2512 for (int i = 0; i < sqlStatements.size(); i++) { 2513 TCustomSqlStatement stmt = sqlStatements.get(i); 2514 if (stmt == null) continue; 2515 clearNonOrphanSphintHintsRecursive(stmt, orphanPositions); 2516 } 2517 } 2518 2519 /** 2520 * Recursively clear sphint hints except for genuinely orphan columns. 2521 */ 2522 private void clearNonOrphanSphintHintsRecursive(TCustomSqlStatement stmt, Set<String> orphanPositions) { 2523 if (stmt == null) return; 2524 2525 // Clear sphint hints that are NOT for genuinely orphan columns 2526 if (stmt.getSyntaxHints() != null && stmt.getSyntaxHints().size() > 0) { 2527 for (int j = stmt.getSyntaxHints().size() - 1; j >= 0; j--) { 2528 TSyntaxError syntaxError = stmt.getSyntaxHints().get(j); 2529 if (syntaxError.errortype == EErrorType.sphint) { 2530 String key = syntaxError.lineNo + ":" + syntaxError.columnNo; 2531 if (!orphanPositions.contains(key)) { 2532 // This sphint is NOT for a genuinely orphan column - clear it 2533 stmt.getSyntaxHints().remove(j); 2534 logDebug("Cleared sphint at line " + syntaxError.lineNo); 2535 } 2536 // Keep sphint hints for genuinely orphan columns (in orphanPositions) 2537 } 2538 } 2539 } 2540 2541 // Note: orphanColumns is populated by populateOrphanColumns() in Phase 4b 2542 // DO NOT clear it here - TGetTableColumn relies on orphanColumns for 2543 // linkOrphanColumnToFirstTable functionality 2544 2545 // Process nested statements 2546 for (int k = 0; k < stmt.getStatements().size(); k++) { 2547 clearNonOrphanSphintHintsRecursive(stmt.getStatements().get(k), orphanPositions); 2548 } 2549 } 2550 2551 2552 2553 /** 2554 * Filter UNNEST table's linkedColumns to keep only legitimate columns. 2555 * Phase 1 (linkColumnToTable) may incorrectly link external variables to UNNEST 2556 * when UNNEST is the only table in scope. This method removes such incorrect links. 2557 * 2558 * Legitimate columns for UNNEST: 2559 * - Implicit column: the alias (e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2560 * - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2561 * - Derived struct field columns (from UNNEST of STRUCT arrays) 2562 */ 2563 private void filterUnnestLinkedColumns(TTable unnestTable) { 2564 if (unnestTable == null || unnestTable.getTableType() != ETableSource.unnest) { 2565 return; 2566 } 2567 2568 TObjectNameList linkedColumns = unnestTable.getLinkedColumns(); 2569 if (linkedColumns == null || linkedColumns.size() == 0) { 2570 return; 2571 } 2572 2573 // Build set of legitimate column names 2574 java.util.Set<String> legitimateNames = new java.util.HashSet<>(); 2575 2576 // 1. Implicit column (alias name) 2577 String aliasName = unnestTable.getAliasName(); 2578 if (aliasName != null && !aliasName.isEmpty()) { 2579 legitimateNames.add(aliasName.toUpperCase()); 2580 } 2581 2582 // 2. WITH OFFSET column 2583 TUnnestClause unnestClause = unnestTable.getUnnestClause(); 2584 if (unnestClause != null && unnestClause.getWithOffset() != null) { 2585 if (unnestClause.getWithOffsetAlais() != null && 2586 unnestClause.getWithOffsetAlais().getAliasName() != null) { 2587 legitimateNames.add(unnestClause.getWithOffsetAlais().getAliasName().toString().toUpperCase()); 2588 } else { 2589 legitimateNames.add("OFFSET"); 2590 } 2591 } 2592 2593 // 3. Derived struct field columns 2594 if (unnestClause != null && unnestClause.getDerivedColumnList() != null) { 2595 for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) { 2596 TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i); 2597 if (derivedCol != null) { 2598 legitimateNames.add(derivedCol.toString().toUpperCase()); 2599 } 2600 } 2601 } 2602 2603 // 4. Explicit alias columns (Presto/Trino syntax: UNNEST(...) AS t(col1, col2)) 2604 if (unnestTable.getAliasClause() != null && 2605 unnestTable.getAliasClause().getColumns() != null) { 2606 for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) { 2607 TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i); 2608 if (colName != null) { 2609 legitimateNames.add(colName.toString().toUpperCase()); 2610 } 2611 } 2612 } 2613 2614 // Collect columns to keep 2615 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2616 for (int i = 0; i < linkedColumns.size(); i++) { 2617 TObjectName col = linkedColumns.getObjectName(i); 2618 if (col != null) { 2619 String colName = col.getColumnNameOnly(); 2620 if (colName != null && legitimateNames.contains(colName.toUpperCase())) { 2621 toKeep.add(col); 2622 } 2623 } 2624 } 2625 2626 // Clear and re-add only legitimate columns 2627 linkedColumns.clear(); 2628 for (TObjectName col : toKeep) { 2629 linkedColumns.addObjectName(col); 2630 } 2631 } 2632 2633 /** 2634 * Clear linkedColumns on all tables in all statements. 2635 */ 2636 private void clearAllLinkedColumns() { 2637 // Use a set to track processed statements and avoid processing duplicates 2638 // This is important when processing subqueries within tables, as the same 2639 // subquery might be reachable from multiple paths 2640 java.util.Set<TCustomSqlStatement> processed = new java.util.HashSet<>(); 2641 for (int i = 0; i < sqlStatements.size(); i++) { 2642 clearLinkedColumnsRecursive(sqlStatements.get(i), processed); 2643 } 2644 } 2645 2646 /** 2647 * Recursively clear orphanColumns on statements. 2648 * These will be repopulated with genuinely unresolved columns in Phase 4b. 2649 */ 2650 private void clearOrphanColumnsRecursive(TCustomSqlStatement stmt) { 2651 if (stmt == null) return; 2652 2653 if (stmt.getOrphanColumns() != null) { 2654 stmt.getOrphanColumns().clear(); 2655 } 2656 2657 // Process nested statements 2658 for (int i = 0; i < stmt.getStatements().size(); i++) { 2659 clearOrphanColumnsRecursive(stmt.getStatements().get(i)); 2660 } 2661 2662 // Also handle stored procedure/function body statements 2663 if (stmt instanceof gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) { 2664 gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement sp = 2665 (gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) stmt; 2666 for (int i = 0; i < sp.getBodyStatements().size(); i++) { 2667 clearOrphanColumnsRecursive(sp.getBodyStatements().get(i)); 2668 } 2669 } 2670 } 2671 2672 private void clearLinkedColumnsRecursive(TCustomSqlStatement stmt, java.util.Set<TCustomSqlStatement> processed) { 2673 if (stmt == null) return; 2674 2675 // Skip if already processed to avoid redundant work and potential infinite loops 2676 if (processed.contains(stmt)) { 2677 return; 2678 } 2679 processed.add(stmt); 2680 2681 // Skip DAX statements - they populate their own linkedColumns during parsing 2682 // via TDaxFunction.doParse() which calls psql.linkColumnToTable() directly. 2683 // TSQLResolver2's ScopeBuilder doesn't traverse DAX expressions, so we must 2684 // preserve the linkedColumns that DAX parsing already established. 2685 if (stmt instanceof TDaxStmt) { 2686 return; 2687 } 2688 2689 // Skip ALTER TABLE statements - they populate linkedColumns during parsing 2690 // via TAlterTableOption.doParse() which directly adds columns to the target table's 2691 // linkedColumns. TSQLResolver2's ScopeBuilder doesn't traverse these option nodes, 2692 // so we must preserve the linkedColumns that parsing already established. 2693 if (stmt instanceof TAlterTableStatement) { 2694 return; 2695 } 2696 2697 // For CREATE TABLE statements, we need special handling: 2698 // - Regular CREATE TABLE (with column definitions): Preserve constraint columns 2699 // populated during TConstraint.doParse() 2700 // - CTAS (CREATE TABLE AS SELECT): Filter out source columns incorrectly added 2701 // to target table, but preserve the correctly created alias columns 2702 boolean isCreateTable = (stmt instanceof TCreateTableSqlStatement); 2703 if (isCreateTable) { 2704 TCreateTableSqlStatement ctas = (TCreateTableSqlStatement) stmt; 2705 boolean isCTAS = (ctas.getSubQuery() != null); 2706 // For CTAS, filter out source columns from target table's linkedColumns 2707 // The old resolver incorrectly adds source columns (from the SELECT) to the target table 2708 // Keep only columns whose sourceTable is the target table itself 2709 if (isCTAS && ctas.getTargetTable() != null) { 2710 TTable targetTable = ctas.getTargetTable(); 2711 TObjectNameList linkedColumns = targetTable.getLinkedColumns(); 2712 if (linkedColumns != null && linkedColumns.size() > 0) { 2713 // Collect columns to keep (those belonging to target table) 2714 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2715 for (int i = 0; i < linkedColumns.size(); i++) { 2716 TObjectName col = linkedColumns.getObjectName(i); 2717 if (col != null && col.getSourceTable() == targetTable) { 2718 toKeep.add(col); 2719 } 2720 } 2721 // Clear and re-add only the columns to keep 2722 linkedColumns.clear(); 2723 for (TObjectName col : toKeep) { 2724 linkedColumns.addObjectName(col); 2725 } 2726 } 2727 } 2728 } 2729 2730 if (!isCreateTable && stmt.tables != null) { 2731 // Check if this statement contains a TD_UNPIVOT table 2732 // TD_UNPIVOT populates linkedColumns on its inner table during TTDUnpivot.doParse() 2733 // If we clear linkedColumns here, we lose those column references 2734 boolean hasTDUnpivot = false; 2735 for (int i = 0; i < stmt.tables.size(); i++) { 2736 TTable table = stmt.tables.getTable(i); 2737 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2738 hasTDUnpivot = true; 2739 break; 2740 } 2741 } 2742 2743 for (int i = 0; i < stmt.tables.size(); i++) { 2744 TTable table = stmt.tables.getTable(i); 2745 if (table != null && table.getLinkedColumns() != null) { 2746 // For UNNEST tables, filter out incorrectly linked columns from Phase 1. 2747 // Phase 1 (linkColumnToTable) may have linked external variables to UNNEST 2748 // when it's the only table in scope. Keep only legitimate columns: 2749 // - Implicit column (the UNNEST alias, e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2750 // - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2751 if (table.getTableType() == ETableSource.unnest) { 2752 filterUnnestLinkedColumns(table); 2753 continue; 2754 } 2755 // Skip TD_UNPIVOT tables - they don't have their own columns but 2756 // TTDUnpivot.doParse() populates columns on the inner table 2757 if (table.getTableType() == ETableSource.td_unpivot) { 2758 continue; 2759 } 2760 // If this statement contains TD_UNPIVOT, skip clearing all tables 2761 // because TD_UNPIVOT populates linkedColumns on inner tables 2762 if (hasTDUnpivot) { 2763 continue; 2764 } 2765 table.getLinkedColumns().clear(); 2766 } 2767 } 2768 } 2769 2770 // Skip recursive processing if this statement contains TD_UNPIVOT 2771 // TD_UNPIVOT's inner table (in the ON clause) has columns populated during parsing 2772 // and those columns need to be preserved 2773 boolean hasTDUnpivot = false; 2774 if (stmt.tables != null) { 2775 for (int i = 0; i < stmt.tables.size(); i++) { 2776 TTable table = stmt.tables.getTable(i); 2777 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2778 hasTDUnpivot = true; 2779 break; 2780 } 2781 } 2782 } 2783 2784 if (!hasTDUnpivot) { 2785 for (int i = 0; i < stmt.getStatements().size(); i++) { 2786 clearLinkedColumnsRecursive(stmt.getStatements().get(i), processed); 2787 } 2788 2789 // Also process subqueries within tables - these are NOT in getStatements() 2790 // but are accessed via table.getSubquery() 2791 if (stmt.tables != null) { 2792 for (int i = 0; i < stmt.tables.size(); i++) { 2793 TTable table = stmt.tables.getTable(i); 2794 if (table != null && table.getSubquery() != null) { 2795 clearLinkedColumnsRecursive(table.getSubquery(), processed); 2796 } 2797 } 2798 } 2799 } 2800 } 2801 2802 /** 2803 * Recursively fill TTable.getAttributes() for all tables in a statement. 2804 * Uses namespace data already collected during name resolution. 2805 * 2806 * Processing order is important: 2807 * 1. Process CTEs first 2808 * 2. Process leaf tables (objectname, function, etc.) - not JOIN or subquery 2809 * 3. Process subqueries (recursively) 2810 * 4. Process JOIN tables last (they depend on child tables having attributes) 2811 */ 2812 private void fillTableAttributesRecursive(TCustomSqlStatement stmt, Set<TTable> processedTables) { 2813 if (stmt == null) return; 2814 2815 // Skip DAX statements - they use their own attribute/linkedColumn mechanism 2816 // established during TDaxFunction.doParse() parsing phase. 2817 if (stmt instanceof TDaxStmt) { 2818 return; 2819 } 2820 2821 // Skip ALTER TABLE statements - they use their own linkedColumn mechanism 2822 // established during TAlterTableOption.doParse() parsing phase. 2823 if (stmt instanceof TAlterTableStatement) { 2824 return; 2825 } 2826 2827 // Skip CREATE TABLE statements - they use their own linkedColumn mechanism 2828 // established during TConstraint.doParse() parsing phase. 2829 if (stmt instanceof TCreateTableSqlStatement) { 2830 return; 2831 } 2832 2833 // Phase 1: Process CTE tables first 2834 if (stmt instanceof TSelectSqlStatement) { 2835 TSelectSqlStatement selectStmt = (TSelectSqlStatement) stmt; 2836 TCTEList cteList = selectStmt.getCteList(); 2837 if (cteList != null) { 2838 for (int i = 0; i < cteList.size(); i++) { 2839 TCTE cte = cteList.getCTE(i); 2840 if (cte != null && cte.getSubquery() != null) { 2841 fillTableAttributesRecursive(cte.getSubquery(), processedTables); 2842 } 2843 } 2844 } 2845 } 2846 2847 // Collect tables by type for proper processing order 2848 List<TTable> leafTables = new ArrayList<>(); 2849 List<TTable> subqueryTables = new ArrayList<>(); 2850 List<TTable> joinTables = new ArrayList<>(); 2851 2852 // First, collect from stmt.tables 2853 if (stmt.tables != null) { 2854 for (int i = 0; i < stmt.tables.size(); i++) { 2855 TTable table = stmt.tables.getTable(i); 2856 if (table == null || processedTables.contains(table)) continue; 2857 2858 switch (table.getTableType()) { 2859 case join: 2860 joinTables.add(table); 2861 // Also collect nested tables within the join 2862 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2863 break; 2864 case subquery: 2865 subqueryTables.add(table); 2866 break; 2867 default: 2868 leafTables.add(table); 2869 break; 2870 } 2871 } 2872 } 2873 2874 // Also collect from getRelations() - JOIN tables are often stored there 2875 if (stmt.getRelations() != null) { 2876 for (int i = 0; i < stmt.getRelations().size(); i++) { 2877 IRelation rel = stmt.getRelations().get(i); 2878 if (!(rel instanceof TTable)) continue; 2879 TTable table = (TTable) rel; 2880 if (processedTables.contains(table)) continue; 2881 2882 if (table.getTableType() == ETableSource.join) { 2883 if (!joinTables.contains(table)) { 2884 joinTables.add(table); 2885 // Also collect nested tables within the join 2886 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2887 } 2888 } 2889 } 2890 } 2891 2892 // Phase 2: Process leaf tables first (objectname, function, xml, etc.) 2893 for (TTable table : leafTables) { 2894 if (!processedTables.contains(table)) { 2895 fillTableAttributes(table, processedTables, stmt); 2896 processedTables.add(table); 2897 } 2898 } 2899 2900 // Phase 3: Process subqueries (recursively process their contents first) 2901 for (TTable table : subqueryTables) { 2902 if (!processedTables.contains(table)) { 2903 if (table.getSubquery() != null) { 2904 fillTableAttributesRecursive(table.getSubquery(), processedTables); 2905 } 2906 fillTableAttributes(table, processedTables, stmt); 2907 processedTables.add(table); 2908 } 2909 } 2910 2911 // Phase 4: Process JOIN tables last (they need child tables to have attributes) 2912 for (TTable table : joinTables) { 2913 if (!processedTables.contains(table)) { 2914 fillTableAttributes(table, processedTables, stmt); 2915 processedTables.add(table); 2916 } 2917 } 2918 2919 // Process nested statements 2920 for (int i = 0; i < stmt.getStatements().size(); i++) { 2921 fillTableAttributesRecursive(stmt.getStatements().get(i), processedTables); 2922 } 2923 } 2924 2925 /** 2926 * Collect nested tables within a JOIN expression. 2927 * This ensures all component tables are processed before the JOIN itself. 2928 */ 2929 private void collectNestedJoinTables(TTable joinTable, 2930 List<TTable> leafTables, 2931 List<TTable> subqueryTables, 2932 List<TTable> joinTables, 2933 Set<TTable> processedTables) { 2934 if (joinTable == null || joinTable.getJoinExpr() == null) return; 2935 2936 TJoinExpr joinExpr = joinTable.getJoinExpr(); 2937 2938 // Process left table 2939 TTable leftTable = joinExpr.getLeftTable(); 2940 if (leftTable != null && !processedTables.contains(leftTable)) { 2941 switch (leftTable.getTableType()) { 2942 case join: 2943 joinTables.add(leftTable); 2944 collectNestedJoinTables(leftTable, leafTables, subqueryTables, joinTables, processedTables); 2945 break; 2946 case subquery: 2947 subqueryTables.add(leftTable); 2948 break; 2949 default: 2950 leafTables.add(leftTable); 2951 break; 2952 } 2953 } 2954 2955 // Process right table 2956 TTable rightTable = joinExpr.getRightTable(); 2957 if (rightTable != null && !processedTables.contains(rightTable)) { 2958 switch (rightTable.getTableType()) { 2959 case join: 2960 joinTables.add(rightTable); 2961 collectNestedJoinTables(rightTable, leafTables, subqueryTables, joinTables, processedTables); 2962 break; 2963 case subquery: 2964 subqueryTables.add(rightTable); 2965 break; 2966 default: 2967 leafTables.add(rightTable); 2968 break; 2969 } 2970 } 2971 } 2972 2973 /** 2974 * Fill TTable.getAttributes() for a single table using namespace data. 2975 * This converts the namespace's columnSources to TAttributeNode objects. 2976 * 2977 * @param table The table to fill attributes for 2978 * @param processedTables Set of already processed tables to avoid duplicates 2979 * @param stmt The statement context (used for UNNEST to get the SELECT statement) 2980 */ 2981 private void fillTableAttributes(TTable table, Set<TTable> processedTables, TCustomSqlStatement stmt) { 2982 if (table == null) return; 2983 2984 // Clear existing attributes 2985 table.getAttributes().clear(); 2986 2987 String displayName = table.getDisplayName(true); 2988 if (displayName == null || displayName.isEmpty()) { 2989 displayName = table.getAliasName(); 2990 if (displayName == null || displayName.isEmpty()) { 2991 displayName = table.getName(); 2992 } 2993 } 2994 2995 // First, try to use existing namespace from ScopeBuildResult 2996 // Skip namespace lookup for UNNEST tables - they need special handling via initAttributesForUnnest 2997 INamespace existingNamespace = null; 2998 if (table.getTableType() != ETableSource.unnest) { 2999 existingNamespace = scopeBuildResult != null 3000 ? scopeBuildResult.getNamespaceForTable(table) 3001 : null; 3002 } 3003 3004 if (existingNamespace != null) { 3005 // Use existing namespace's column sources 3006 // Returns false if namespace has no real metadata (only inferred columns) 3007 if (fillAttributesFromNamespace(table, existingNamespace, displayName)) { 3008 return; 3009 } 3010 // Fall through to legacy logic if no real metadata 3011 } 3012 3013 // Fall back to type-specific handling if no namespace found 3014 switch (table.getTableType()) { 3015 case objectname: 3016 if (table.isCTEName()) { 3017 // CTE reference - use initAttributesFromCTE 3018 TCTE cte = table.getCTE(); 3019 if (cte != null) { 3020 table.initAttributesFromCTE(cte); 3021 } 3022 } else { 3023 // Physical table - create TableNamespace and extract columns 3024 fillPhysicalTableAttributes(table, displayName); 3025 } 3026 break; 3027 3028 case subquery: 3029 // Subquery - use initAttributesFromSubquery 3030 if (table.getSubquery() != null) { 3031 String prefix = ""; 3032 if (table.getAliasClause() != null) { 3033 prefix = table.getAliasClause().toString() + "."; 3034 } 3035 table.initAttributesFromSubquery(table.getSubquery(), prefix); 3036 } 3037 break; 3038 3039 case join: 3040 // JOIN - combine attributes from left and right tables 3041 // First, add USING columns to the left and right tables (if present) 3042 if (table.getJoinExpr() != null) { 3043 addUsingColumnsToTables(table.getJoinExpr()); 3044 // Then initialize the join expression's attributes (which pulls from left/right tables) 3045 table.getJoinExpr().initAttributes(0); 3046 } 3047 table.initAttributesForJoin(); 3048 break; 3049 3050 case function: 3051 // Table function 3052 table.initAttributeForTableFunction(); 3053 break; 3054 3055 case xmltable: 3056 // XML table 3057 table.initAttributeForXMLTable(); 3058 break; 3059 3060 case tableExpr: 3061 // Table expression 3062 TAttributeNode.addNodeToList( 3063 new TAttributeNode(displayName + ".*", table), 3064 table.getAttributes() 3065 ); 3066 break; 3067 3068 case rowList: 3069 // Row list 3070 table.initAttributeForRowList(); 3071 break; 3072 3073 case unnest: 3074 // UNNEST - initialize attributes using the SELECT statement context 3075 if (stmt instanceof TSelectSqlStatement) { 3076 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3077 table.initAttributesForUnnest(getSqlEnv(), select); 3078 } 3079 break; 3080 3081 case pivoted_table: 3082 // PIVOT table 3083 table.initAttributesForPivotTable(); 3084 break; 3085 } 3086 } 3087 3088 /** 3089 * Fill table attributes from an existing namespace's column sources. 3090 * This uses the namespace data that was collected during ScopeBuilder traversal. 3091 * 3092 * @return true if attributes were successfully filled, false if should fall back to legacy logic 3093 */ 3094 private boolean fillAttributesFromNamespace(TTable table, INamespace namespace, String displayName) { 3095 // Ensure namespace is validated 3096 if (!namespace.isValidated()) { 3097 namespace.validate(); 3098 } 3099 3100 // For TableNamespace without actual metadata (only inferred columns), 3101 // return false to fall back to legacy logic which uses wildcards 3102 if (namespace instanceof TableNamespace) { 3103 TableNamespace tableNs = (TableNamespace) namespace; 3104 // Check if the namespace has actual metadata by seeing if there are any columns 3105 // with high confidence from metadata sources (not inferred) 3106 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3107 boolean hasRealMetadata = false; 3108 for (ColumnSource source : columnSources.values()) { 3109 if (source.getConfidence() >= 1.0 && 3110 !("inferred_from_usage".equals(source.getEvidence()))) { 3111 hasRealMetadata = true; 3112 break; 3113 } 3114 } 3115 if (!hasRealMetadata) { 3116 // No real metadata, fall back to legacy logic with wildcards 3117 return false; 3118 } 3119 3120 // Has metadata - use namespace columns 3121 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3122 String colName = entry.getKey(); 3123 ColumnSource source = entry.getValue(); 3124 // Only include columns with real metadata, not inferred ones 3125 if (source.getConfidence() >= 1.0 && 3126 !("inferred_from_usage".equals(source.getEvidence()))) { 3127 TAttributeNode.addNodeToList( 3128 new TAttributeNode(displayName + "." + colName, table), 3129 table.getAttributes() 3130 ); 3131 } 3132 } 3133 3134 // If no columns after filtering, add wildcard 3135 if (table.getAttributes().isEmpty()) { 3136 TAttributeNode.addNodeToList( 3137 new TAttributeNode(displayName + ".*", table), 3138 table.getAttributes() 3139 ); 3140 } 3141 return true; 3142 } 3143 3144 // For other namespace types (SubqueryNamespace, CTENamespace, etc.), 3145 // use all column sources 3146 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3147 if (columnSources != null && !columnSources.isEmpty()) { 3148 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3149 String colName = entry.getKey(); 3150 TAttributeNode.addNodeToList( 3151 new TAttributeNode(displayName + "." + colName, table), 3152 table.getAttributes() 3153 ); 3154 } 3155 } 3156 3157 // If no columns found, add wildcard attribute 3158 if (table.getAttributes().isEmpty()) { 3159 TAttributeNode.addNodeToList( 3160 new TAttributeNode(displayName + ".*", table), 3161 table.getAttributes() 3162 ); 3163 } 3164 return true; 3165 } 3166 3167 /** 3168 * Fill attributes for a physical table using TableNamespace. 3169 */ 3170 private void fillPhysicalTableAttributes(TTable table, String displayName) { 3171 // Create namespace for this table with sqlEnv and vendor for qualified name resolution 3172 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 3173 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 3174 TableNamespace namespace = new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 3175 3176 // Validate to populate columnSources 3177 namespace.validate(); 3178 3179 // Convert columnSources to TAttributeNode 3180 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3181 if (columnSources != null && !columnSources.isEmpty()) { 3182 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3183 String colName = entry.getKey(); 3184 TAttributeNode.addNodeToList( 3185 new TAttributeNode(displayName + "." + colName, table), 3186 table.getAttributes() 3187 ); 3188 } 3189 } 3190 3191 // If no columns found from metadata, add wildcard attribute 3192 // (this allows any column to potentially match) 3193 if (table.getAttributes().isEmpty()) { 3194 // Add columns from linkedColumns if available 3195 if (table.getLinkedColumns() != null && table.getLinkedColumns().size() > 0) { 3196 for (TObjectName col : table.getLinkedColumns()) { 3197 if (col.getCandidateTables() != null && col.getCandidateTables().size() > 1) { 3198 continue; // Skip ambiguous columns 3199 } 3200 TAttributeNode.addNodeToList( 3201 new TAttributeNode(displayName + "." + col.getColumnNameOnly(), table), 3202 table.getAttributes() 3203 ); 3204 } 3205 } 3206 // Add wildcard attribute 3207 TAttributeNode.addNodeToList( 3208 new TAttributeNode(displayName + ".*", table), 3209 table.getAttributes() 3210 ); 3211 } 3212 } 3213 3214 /** 3215 * Add USING columns to the left and right tables in a JOIN expression. 3216 * USING columns should appear in both tables' attribute lists before the wildcard. 3217 * This method recursively handles nested JOINs. 3218 */ 3219 private void addUsingColumnsToTables(TJoinExpr joinExpr) { 3220 if (joinExpr == null) return; 3221 3222 // Recursively handle nested joins 3223 TTable leftTable = joinExpr.getLeftTable(); 3224 TTable rightTable = joinExpr.getRightTable(); 3225 3226 if (leftTable != null && leftTable.getTableType() == ETableSource.join && leftTable.getJoinExpr() != null) { 3227 addUsingColumnsToTables(leftTable.getJoinExpr()); 3228 } 3229 if (rightTable != null && rightTable.getTableType() == ETableSource.join && rightTable.getJoinExpr() != null) { 3230 addUsingColumnsToTables(rightTable.getJoinExpr()); 3231 } 3232 3233 // Handle USING columns in this join 3234 gudusoft.gsqlparser.nodes.TObjectNameList usingColumns = joinExpr.getUsingColumns(); 3235 if (usingColumns == null || usingColumns.size() == 0) return; 3236 3237 // Add USING columns to both tables 3238 for (int i = 0; i < usingColumns.size(); i++) { 3239 TObjectName usingCol = usingColumns.getObjectName(i); 3240 if (usingCol == null) continue; 3241 String colName = usingCol.getColumnNameOnly(); 3242 3243 // Add to left table (insert before wildcard if possible) 3244 if (leftTable != null && leftTable.getTableType() != ETableSource.join) { 3245 addColumnAttributeBeforeWildcard(leftTable, colName); 3246 } 3247 3248 // Add to right table (insert before wildcard if possible) 3249 if (rightTable != null && rightTable.getTableType() != ETableSource.join) { 3250 addColumnAttributeBeforeWildcard(rightTable, colName); 3251 } 3252 } 3253 } 3254 3255 /** 3256 * Add a column attribute to a table, inserting before the wildcard (*) if present. 3257 * This ensures USING columns appear before the wildcard in the attribute list. 3258 */ 3259 private void addColumnAttributeBeforeWildcard(TTable table, String columnName) { 3260 if (table == null || columnName == null) return; 3261 3262 String displayName = table.getDisplayName(true); 3263 if (displayName == null || displayName.isEmpty()) { 3264 displayName = table.getAliasName(); 3265 if (displayName == null || displayName.isEmpty()) { 3266 displayName = table.getName(); 3267 } 3268 } 3269 3270 String attrName = displayName + "." + columnName; 3271 3272 // Check if attribute already exists 3273 ArrayList<TAttributeNode> attrs = table.getAttributes(); 3274 for (TAttributeNode attr : attrs) { 3275 if (attr.getName().equalsIgnoreCase(attrName)) { 3276 return; // Already exists 3277 } 3278 } 3279 3280 // Find the wildcard position 3281 int wildcardIndex = -1; 3282 for (int i = 0; i < attrs.size(); i++) { 3283 if (attrs.get(i).getName().endsWith(".*")) { 3284 wildcardIndex = i; 3285 break; 3286 } 3287 } 3288 3289 // Insert before wildcard or add to end 3290 TAttributeNode newAttr = new TAttributeNode(attrName, table); 3291 if (wildcardIndex >= 0) { 3292 attrs.add(wildcardIndex, newAttr); 3293 } else { 3294 TAttributeNode.addNodeToList(newAttr, attrs); 3295 } 3296 } 3297 3298 /** 3299 * Sync a single column to legacy structures. 3300 * @return true if column was synced (had a sourceTable) 3301 */ 3302 private boolean syncColumnToLegacy(TObjectName column) { 3303 if (column == null) return false; 3304 3305 // Special handling for star columns (SELECT *) 3306 // Star columns represent ALL tables in the FROM clause and should be synced to ALL tables 3307 // in their sourceTableList, not just the first one. 3308 String columnName = column.getColumnNameOnly(); 3309 if (columnName != null && columnName.equals("*")) { 3310 java.util.ArrayList<TTable> sourceTableList = column.getSourceTableList(); 3311 if (sourceTableList != null && sourceTableList.size() > 0) { 3312 boolean synced = false; 3313 for (TTable starTable : sourceTableList) { 3314 if (starTable == null) continue; 3315 // Skip subquery types - the star should be linked to physical tables 3316 if (starTable.getTableType() == ETableSource.subquery) continue; 3317 gudusoft.gsqlparser.nodes.TObjectNameList starLinkedColumns = starTable.getLinkedColumns(); 3318 if (starLinkedColumns != null && !containsColumn(starLinkedColumns, column)) { 3319 starLinkedColumns.addObjectName(column); 3320 synced = true; 3321 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3322 logInfo("syncColumnToLegacy: Synced star column to sourceTableList table: " 3323 + starTable.getTableName()); 3324 } 3325 } 3326 } 3327 return synced; 3328 } 3329 } 3330 3331 // Check if column is AMBIGUOUS - don't sync to legacy if it's ambiguous 3332 // Ambiguous columns should be added to orphanColumns, not linkedColumns 3333 // NOTE: Skip this check for star columns (*) since they are handled specially 3334 // via sourceTableList and should be linked to all tables in the FROM clause 3335 ResolutionResult resolution = column.getResolution(); 3336 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS) { 3337 // Don't treat star columns as ambiguous - they're supposed to match all tables 3338 if (columnName != null && columnName.equals("*")) { 3339 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3340 logInfo("syncColumnToLegacy: Star column has AMBIGUOUS status, proceeding with normal sync"); 3341 } 3342 // Fall through to normal processing 3343 } else { 3344 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3345 logInfo("syncColumnToLegacy: Skipping AMBIGUOUS column: " + column.toString() 3346 + " with " + (resolution.getAmbiguousSource() != null ? 3347 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 3348 } 3349 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) 3350 // This ensures the column will be treated as orphan by TGetTableColumn 3351 if (column.getSourceTable() != null) { 3352 column.setSourceTable(null); 3353 } 3354 return false; 3355 } 3356 } 3357 3358 TTable sourceTable = column.getSourceTable(); 3359 ColumnSource source = column.getColumnSource(); 3360 3361 // Handle columns resolved through PlsqlVariableNamespace 3362 // These are stored procedure variables/parameters - mark them as variables 3363 // so they won't be added to orphan columns 3364 if (source != null && source.getSourceNamespace() instanceof gudusoft.gsqlparser.resolver2.namespace.PlsqlVariableNamespace) { 3365 column.setDbObjectTypeDirectly(EDbObjectType.variable); 3366 // Variables don't need to be linked to tables 3367 return false; 3368 } 3369 3370 // Fix for subquery columns: When a column is EXPLICITLY QUALIFIED with a subquery alias 3371 // (e.g., mm.material_id), the old resolver Phase 1 may have incorrectly set sourceTable 3372 // to the physical table inside the subquery. TSQLResolver2 should correct this to point 3373 // to the subquery TTable itself. This preserves the intermediate layer for data lineage: 3374 // mm.material_id -> subquery mm -> physical table 3375 // 3376 // IMPORTANT: Only apply this correction for QUALIFIED columns. Unqualified columns 3377 // (like those inferred from star column expansion) should keep their physical table 3378 // sourceTable for proper data lineage tracing. 3379 if (source != null && column.isQualified()) { 3380 INamespace ns = source.getSourceNamespace(); 3381 if (ns instanceof SubqueryNamespace) { 3382 TTable subqueryTable = ns.getSourceTable(); 3383 // If the subquery's TTable is different from the current sourceTable, 3384 // use the subquery's TTable to maintain proper semantic layering 3385 if (subqueryTable != null && subqueryTable != sourceTable) { 3386 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3387 logInfo("syncColumnToLegacy: Correcting sourceTable from " + 3388 (sourceTable != null ? sourceTable.getTableName() : "null") + 3389 " to subquery " + subqueryTable.getTableName() + " for qualified column " + column.toString()); 3390 } 3391 sourceTable = subqueryTable; 3392 column.setSourceTable(sourceTable); 3393 } 3394 } 3395 } 3396 3397 // If sourceTable is null, try to get it from ColumnSource 3398 // This handles columns resolved to derived tables (subqueries with aliases) 3399 // where TSQLResolver2 resolved via ColumnSource but didn't set sourceTable on TObjectName 3400 if (sourceTable == null && source != null) { 3401 // For alias columns (isColumnAlias) or passthroughs to aliases (getFinalColumnName != null), 3402 // prefer the immediate source table (subquery/CTE) over the traced physical table. 3403 // The alias name doesn't exist in the physical table, so linking with alias name is wrong. 3404 boolean isAliasColumn = source.isColumnAlias() || source.getFinalColumnName() != null; 3405 if (isAliasColumn) { 3406 INamespace ns = source.getSourceNamespace(); 3407 if (ns != null) { 3408 TTable immediateTable = ns.getSourceTable(); 3409 if (immediateTable != null) { 3410 sourceTable = immediateTable; 3411 column.setSourceTable(sourceTable); 3412 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3413 logInfo("syncColumnToLegacy: Set sourceTable to immediate source for alias column " 3414 + column.toString() + " -> " + immediateTable.getTableName()); 3415 } 3416 } 3417 } 3418 } 3419 if (sourceTable == null) { 3420 TTable finalTable = source.getFinalTable(); 3421 if (finalTable != null) { 3422 sourceTable = finalTable; 3423 column.setSourceTable(sourceTable); 3424 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3425 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getFinalTable() for " 3426 + column.toString() + " -> " + finalTable.getTableName()); 3427 } 3428 } else { 3429 // Try getAllFinalTables() - this may succeed when getFinalTable() returns null 3430 // For example, columns inferred through star push-down may have overrideTable set 3431 // which getAllFinalTables() will return as a single-element list 3432 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3433 if (allFinalTables != null && !allFinalTables.isEmpty()) { 3434 // Use the first non-subquery table from allFinalTables 3435 for (TTable candidateTable : allFinalTables) { 3436 if (candidateTable != null && candidateTable.getTableType() != ETableSource.subquery) { 3437 sourceTable = candidateTable; 3438 column.setSourceTable(sourceTable); 3439 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3440 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getAllFinalTables() for " 3441 + column.toString() + " -> " + candidateTable.getTableName()); 3442 } 3443 break; 3444 } 3445 } 3446 } 3447 3448 // Fallback: try overrideTable for cases like derived tables in JOIN ON clauses 3449 if (sourceTable == null) { 3450 TTable overrideTable = source.getOverrideTable(); 3451 if (overrideTable != null) { 3452 sourceTable = overrideTable; 3453 column.setSourceTable(sourceTable); 3454 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3455 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getOverrideTable() for " 3456 + column.toString() + " -> " + overrideTable.getTableName()); 3457 } 3458 } 3459 } 3460 } 3461 } 3462 } 3463 3464 if (sourceTable == null) { 3465 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && source != null) { 3466 logInfo("syncColumnToLegacy: Column " + column.toString() 3467 + " has ColumnSource but no table. Namespace: " 3468 + (source.getSourceNamespace() != null ? source.getSourceNamespace().getClass().getSimpleName() : "null") 3469 + ", evidence: " + source.getEvidence()); 3470 } 3471 return false; 3472 } 3473 3474 // For struct-field access (e.g., customer.customer_id in BigQuery), 3475 // create a synthetic column representing the base column (e.g., "customer") 3476 // instead of using the original column which has the field name (e.g., "customer_id") 3477 if (source != null && source.isStructFieldAccess()) { 3478 String baseColumnName = source.getExposedName(); 3479 if (baseColumnName != null && !baseColumnName.isEmpty()) { 3480 // Create synthetic TObjectName for the base column 3481 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvbigquery; 3482 TObjectName baseColumn = TObjectName.createObjectName( 3483 vendor, EDbObjectType.column, baseColumnName); 3484 baseColumn.setSourceTable(sourceTable); 3485 3486 // Add the base column to linkedColumns (avoid duplicates by name) 3487 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3488 if (linkedColumns != null && !containsColumnByName(linkedColumns, baseColumnName)) { 3489 linkedColumns.addObjectName(baseColumn); 3490 } 3491 return true; // Skip adding the original struct-qualified column to linkedColumns. 3492 // DataFlowAnalyzer uses FieldPath from the original TObjectName to match 3493 // against the synthetic base column via getStructFieldFullName(). 3494 } 3495 } 3496 3497 // 1. Add to TTable.linkedColumns (avoid duplicates) 3498 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3499 if (linkedColumns != null && !containsColumn(linkedColumns, column)) { 3500 linkedColumns.addObjectName(column); 3501 } 3502 3503 // 2. For UNION scenarios, also add to all final tables from UNION branches 3504 // This is critical for star column push-down tests that expect columns to be 3505 // linked to ALL tables in a UNION, not just the first one. 3506 if (source != null) { 3507 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3508 if (allFinalTables != null && allFinalTables.size() > 1) { 3509 for (TTable unionTable : allFinalTables) { 3510 if (unionTable == null || unionTable == sourceTable) continue; 3511 // Skip subquery types - only link to physical tables 3512 if (unionTable.getTableType() == ETableSource.subquery) continue; 3513 gudusoft.gsqlparser.nodes.TObjectNameList unionLinkedColumns = unionTable.getLinkedColumns(); 3514 if (unionLinkedColumns != null && !containsColumn(unionLinkedColumns, column)) { 3515 unionLinkedColumns.addObjectName(column); 3516 } 3517 } 3518 } 3519 3520 // 2b. For CTE columns, also link to the CTE reference table 3521 // When a column is resolved through a CTE, it should be linked to both: 3522 // - The CTE reference table (immediate source) 3523 // - The underlying physical tables (final source) 3524 INamespace ns = source.getSourceNamespace(); 3525 if (ns instanceof gudusoft.gsqlparser.resolver2.namespace.CTENamespace) { 3526 gudusoft.gsqlparser.resolver2.namespace.CTENamespace cteNs = 3527 (gudusoft.gsqlparser.resolver2.namespace.CTENamespace) ns; 3528 TTable cteTable = cteNs.getReferencingTable(); 3529 if (cteTable != null && cteTable != sourceTable) { 3530 gudusoft.gsqlparser.nodes.TObjectNameList cteLinkedColumns = cteTable.getLinkedColumns(); 3531 if (cteLinkedColumns != null && !containsColumn(cteLinkedColumns, column)) { 3532 cteLinkedColumns.addObjectName(column); 3533 } 3534 } 3535 } 3536 3537 // 2c. For subquery columns, also link to the underlying physical tables 3538 // When sourceTable is a subquery (e.g., qualified column S.id from MERGE USING subquery), 3539 // TGetTableColumn needs the column to be linked to physical tables for output. 3540 // Use getFinalTable() to trace through to the ultimate physical table. 3541 // IMPORTANT: Only link if a column with the same name doesn't already exist - 3542 // this avoids duplicates when both outer and inner queries reference the same column. 3543 // EXCEPTION: Skip MERGE ON clause columns - they should not be linked to the source 3544 // subquery's underlying table because they may belong to the target table instead. 3545 if (sourceTable.getTableType() == ETableSource.subquery) { 3546 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 3547 // subquery's underlying table via star column expansion. 3548 // This is particularly important for MERGE ON clause columns which may 3549 // belong to the target table rather than the source subquery. 3550 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 3551 // the source subquery. 3552 // Note: We check location only because ownStmt may be null for unresolved columns. 3553 boolean isUnqualifiedJoinConditionColumn = (column.getLocation() == ESqlClause.joinCondition) 3554 && (column.getTableString() == null || column.getTableString().isEmpty()); 3555 if (isUnqualifiedJoinConditionColumn && TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3556 logInfo("syncColumnToLegacy: Skipping unqualified join condition column " + column.toString() + 3557 " - should not be traced to subquery's underlying table"); 3558 } 3559 3560 // Skip alias columns - the alias name doesn't exist in the physical table, 3561 // so linking an alias-named column to the physical table produces wrong output 3562 // (e.g., TestTableEmployee.name instead of TestTableEmployee.ename). 3563 // getFinalTable() traces through aliases to find the physical table, but the 3564 // column name is still the alias. Only non-alias columns should be linked. 3565 boolean isAliasColumnForLinking = source.isColumnAlias() || source.getFinalColumnName() != null; 3566 3567 if (!isUnqualifiedJoinConditionColumn && !isAliasColumnForLinking) { 3568 TTable finalTable = source.getFinalTable(); 3569 if (finalTable != null && finalTable != sourceTable && 3570 finalTable.getTableType() != ETableSource.subquery) { 3571 gudusoft.gsqlparser.nodes.TObjectNameList finalLinkedColumns = finalTable.getLinkedColumns(); 3572 if (finalLinkedColumns != null && !containsColumn(finalLinkedColumns, column) 3573 && !containsColumnByName(finalLinkedColumns, column.getColumnNameOnly())) { 3574 finalLinkedColumns.addObjectName(column); 3575 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3576 logInfo("syncColumnToLegacy: Also linked " + column.toString() + 3577 " to underlying physical table " + finalTable.getTableName()); 3578 } 3579 } 3580 } 3581 } 3582 } 3583 3584 } 3585 3586 // 3. Sync linkedColumnDef and sourceColumn from ColumnSource 3587 if (source != null) { 3588 Object defNode = source.getDefinitionNode(); 3589 3590 // Set linkedColumnDef if definition is a TColumnDefinition 3591 if (defNode instanceof gudusoft.gsqlparser.nodes.TColumnDefinition) { 3592 column.setLinkedColumnDef((gudusoft.gsqlparser.nodes.TColumnDefinition) defNode); 3593 } 3594 3595 // Set sourceColumn if definition is a TResultColumn 3596 // BUT skip for CTE explicit columns - these reference the CTE column name (e.g., "mgr_dept") 3597 // not the underlying SELECT column (e.g., "grp"). The CTE column is a TObjectName, 3598 // not a TResultColumn, so we cannot set it as sourceColumn. 3599 if (defNode instanceof TResultColumn) { 3600 String evidence = source.getEvidence(); 3601 boolean isCTEExplicitColumn = evidence != null && evidence.startsWith("cte_explicit_column"); 3602 if (!isCTEExplicitColumn) { 3603 column.setSourceColumn((TResultColumn) defNode); 3604 } 3605 } 3606 // Special case: for star-inferred columns, set sourceColumn to the star column 3607 // The definitionNode is intentionally null to avoid affecting formatter output, 3608 // but we still need to set sourceColumn for legacy API compatibility. 3609 // Use setSourceColumnOnly to avoid changing dbObjectType which affects filtering. 3610 else if (defNode == null && source.getEvidence() != null 3611 && source.getEvidence().contains("auto_inferred")) { 3612 // This is a star-inferred column - get the star column from the namespace 3613 INamespace namespace = source.getSourceNamespace(); 3614 if (namespace != null) { 3615 TResultColumn starColumn = namespace.getStarColumn(); 3616 if (starColumn != null) { 3617 column.setSourceColumnOnly(starColumn); 3618 } 3619 } 3620 } 3621 } 3622 3623 return true; 3624 } 3625 3626 /** 3627 * Check if a column already exists in the list (by identity). 3628 */ 3629 private boolean containsColumn(gudusoft.gsqlparser.nodes.TObjectNameList list, TObjectName column) { 3630 for (int i = 0; i < list.size(); i++) { 3631 if (list.getObjectName(i) == column) { 3632 return true; 3633 } 3634 } 3635 return false; 3636 } 3637 3638 /** 3639 * Check if a column with the given name already exists in the list. 3640 * Used for struct-field access where we create synthetic columns. 3641 */ 3642 private boolean containsColumnByName(gudusoft.gsqlparser.nodes.TObjectNameList list, String columnName) { 3643 if (columnName == null) return false; 3644 // Normalize by stripping quotes for comparison 3645 String normalizedName = stripQuotes(columnName); 3646 for (int i = 0; i < list.size(); i++) { 3647 TObjectName col = list.getObjectName(i); 3648 if (col != null) { 3649 String existingName = stripQuotes(col.getColumnNameOnly()); 3650 if (normalizedName.equalsIgnoreCase(existingName)) { 3651 return true; 3652 } 3653 } 3654 } 3655 return false; 3656 } 3657 3658 /** 3659 * Strip leading/trailing quote characters from a string. 3660 */ 3661 private String stripQuotes(String s) { 3662 if (s == null) return null; 3663 if (s.length() >= 2) { 3664 char first = s.charAt(0); 3665 char last = s.charAt(s.length() - 1); 3666 if ((first == '"' && last == '"') || 3667 (first == '\'' && last == '\'') || 3668 (first == '`' && last == '`') || 3669 (first == '[' && last == ']')) { 3670 return s.substring(1, s.length() - 1); 3671 } 3672 } 3673 return s; 3674 } 3675 3676 /** 3677 * Check if a subquery SELECT statement has an explicit (non-star) column with the given name. 3678 * This is used to determine whether to create traced column clones: 3679 * - If the column matches an explicit column in the subquery, don't clone (stays at subquery level) 3680 * - If the column doesn't match explicit columns (must come from star), clone to physical table 3681 * 3682 * @param subquery the SELECT statement to check 3683 * @param columnName the column name to look for (may have quotes) 3684 * @return true if the subquery has an explicit column matching the name 3685 */ 3686 private boolean subqueryHasExplicitColumn(TSelectSqlStatement subquery, String columnName) { 3687 if (subquery == null || columnName == null) { 3688 return false; 3689 } 3690 3691 // For combined queries (UNION/INTERSECT/EXCEPT), follow left chain iteratively 3692 TSelectSqlStatement current = subquery; 3693 while (current.isCombinedQuery()) { 3694 current = current.getLeftStmt(); 3695 if (current == null) { 3696 return false; 3697 } 3698 } 3699 subquery = current; 3700 3701 TResultColumnList resultColumns = subquery.getResultColumnList(); 3702 if (resultColumns == null) { 3703 return false; 3704 } 3705 3706 // Normalize the column name for comparison (strip quotes) 3707 String normalizedName = stripQuotes(columnName); 3708 3709 for (int i = 0; i < resultColumns.size(); i++) { 3710 TResultColumn rc = resultColumns.getResultColumn(i); 3711 if (rc == null) { 3712 continue; 3713 } 3714 3715 String colStr = rc.toString(); 3716 // Skip star columns - they're not explicit columns 3717 if (colStr != null && (colStr.equals("*") || colStr.endsWith(".*"))) { 3718 continue; 3719 } 3720 3721 // Get the effective column name (alias if present, otherwise the column name) 3722 String effectiveName = null; 3723 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 3724 effectiveName = rc.getAliasClause().getAliasName().toString(); 3725 } else if (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null) { 3726 // For simple column references like "t1.COL1", get the column name 3727 effectiveName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 3728 } 3729 3730 if (effectiveName != null) { 3731 String normalizedEffective = stripQuotes(effectiveName); 3732 if (normalizedName.equalsIgnoreCase(normalizedEffective)) { 3733 return true; 3734 } 3735 } 3736 } 3737 3738 return false; 3739 } 3740 3741 /** 3742 * Expand star columns using push-down inferred columns from namespaces. 3743 * 3744 * This is the core of the star column push-down algorithm: 3745 * 1. Find all star columns in SELECT lists 3746 * 2. For each star column, find its source namespace(s) 3747 * 3. Get inferred columns from the namespace (collected during resolution) 3748 * 4. Expand the star column by populating attributeNodesDerivedFromFromClause 3749 * 3750 * This enables star column expansion without TSQLEnv metadata by using 3751 * columns referenced in outer queries to infer what the star expands to. 3752 */ 3753 private void expandStarColumnsUsingPushDown() { 3754 int expandedCount = 0; 3755 Set<TCustomSqlStatement> processedStmts = new HashSet<>(); 3756 3757 // Track expanded star columns by their string representation for syncing 3758 Map<String, ArrayList<TAttributeNode>> expandedStarCols = new HashMap<>(); 3759 3760 // Process all statements recursively 3761 for (int i = 0; i < sqlStatements.size(); i++) { 3762 expandedCount += expandStarColumnsInStatement(sqlStatements.get(i), processedStmts, expandedStarCols); 3763 } 3764 3765 // Sync expanded attributes to column references in getAllColumnReferences() 3766 // The result column TObjectNames might be different instances than those collected 3767 // during scope building, so we need to copy the expanded attrs 3768 if (scopeBuildResult != null && !expandedStarCols.isEmpty()) { 3769 for (TObjectName colRef : scopeBuildResult.getAllColumnReferences()) { 3770 if (colRef == null) continue; 3771 String colStr = colRef.toString(); 3772 if (colStr == null || !colStr.endsWith("*")) continue; 3773 3774 // Skip if already has expanded attrs 3775 ArrayList<TAttributeNode> existingAttrs = colRef.getAttributeNodesDerivedFromFromClause(); 3776 if (existingAttrs != null && !existingAttrs.isEmpty()) continue; 3777 3778 // Find matching expanded star column 3779 ArrayList<TAttributeNode> expandedAttrs = expandedStarCols.get(colStr); 3780 if (expandedAttrs != null && !expandedAttrs.isEmpty()) { 3781 // Copy the expanded attrs to this column reference 3782 for (TAttributeNode attr : expandedAttrs) { 3783 TAttributeNode.addNodeToList(attr, existingAttrs); 3784 } 3785 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3786 logInfo("Synced " + expandedAttrs.size() + " expanded attrs to column reference: " + colStr); 3787 } 3788 } 3789 } 3790 } 3791 3792 logInfo("Expanded star columns using push-down: " + expandedCount + " columns added"); 3793 } 3794 3795 /** 3796 * Recursively expand star columns in a statement and its nested statements. 3797 * Uses processedStmts to track ALL statements (not just SELECTs) to prevent infinite loops. 3798 */ 3799 private int expandStarColumnsInStatement(TCustomSqlStatement stmt, Set<TCustomSqlStatement> processedStmts, 3800 Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 3801 if (stmt == null) return 0; 3802 3803 // Cycle detection: skip if already processed this statement 3804 if (processedStmts.contains(stmt)) { 3805 return 0; 3806 } 3807 processedStmts.add(stmt); 3808 3809 int count = 0; 3810 3811 // Handle SELECT statements 3812 if (stmt instanceof TSelectSqlStatement) { 3813 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3814 count += expandStarColumnsInSelect(select, expandedStarCols); 3815 3816 // Handle UNION/INTERSECT/EXCEPT - iteratively collect all branches 3817 if (select.isCombinedQuery()) { 3818 Deque<TSelectSqlStatement> unionStack = new ArrayDeque<>(); 3819 if (select.getLeftStmt() != null) unionStack.push(select.getLeftStmt()); 3820 if (select.getRightStmt() != null) unionStack.push(select.getRightStmt()); 3821 while (!unionStack.isEmpty()) { 3822 TSelectSqlStatement branch = unionStack.pop(); 3823 if (branch == null || processedStmts.contains(branch)) continue; 3824 processedStmts.add(branch); 3825 count += expandStarColumnsInSelect(branch, expandedStarCols); 3826 if (branch.isCombinedQuery()) { 3827 if (branch.getLeftStmt() != null) unionStack.push(branch.getLeftStmt()); 3828 if (branch.getRightStmt() != null) unionStack.push(branch.getRightStmt()); 3829 } else { 3830 // Process tables with subqueries in this branch 3831 if (branch.tables != null) { 3832 for (int i = 0; i < branch.tables.size(); i++) { 3833 TTable table = branch.tables.getTable(i); 3834 if (table != null && table.getSubquery() != null) { 3835 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3836 } 3837 } 3838 } 3839 if (branch.getCteList() != null) { 3840 for (int i = 0; i < branch.getCteList().size(); i++) { 3841 TCTE cte = branch.getCteList().getCTE(i); 3842 if (cte != null && cte.getSubquery() != null) { 3843 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3844 } 3845 } 3846 } 3847 } 3848 } 3849 } 3850 } 3851 3852 // Handle MERGE statements specially - process the USING clause 3853 if (stmt instanceof gudusoft.gsqlparser.stmt.TMergeSqlStatement) { 3854 gudusoft.gsqlparser.stmt.TMergeSqlStatement merge = (gudusoft.gsqlparser.stmt.TMergeSqlStatement) stmt; 3855 TTable usingTable = merge.getUsingTable(); 3856 if (usingTable != null && usingTable.getSubquery() != null) { 3857 count += expandStarColumnsInStatement(usingTable.getSubquery(), processedStmts, expandedStarCols); 3858 } 3859 } 3860 3861 // Process nested statements 3862 if (stmt.getStatements() != null) { 3863 for (int i = 0; i < stmt.getStatements().size(); i++) { 3864 Object nested = stmt.getStatements().get(i); 3865 if (nested instanceof TCustomSqlStatement) { 3866 count += expandStarColumnsInStatement((TCustomSqlStatement) nested, processedStmts, expandedStarCols); 3867 } 3868 } 3869 } 3870 3871 // Process tables with subqueries 3872 if (stmt.tables != null) { 3873 for (int i = 0; i < stmt.tables.size(); i++) { 3874 TTable table = stmt.tables.getTable(i); 3875 if (table != null && table.getSubquery() != null) { 3876 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3877 } 3878 } 3879 } 3880 3881 // Process CTEs 3882 if (stmt.getCteList() != null) { 3883 for (int i = 0; i < stmt.getCteList().size(); i++) { 3884 TCTE cte = stmt.getCteList().getCTE(i); 3885 if (cte != null && cte.getSubquery() != null) { 3886 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3887 } 3888 } 3889 } 3890 3891 return count; 3892 } 3893 3894 /** 3895 * Expand star columns in a SELECT statement's result column list. 3896 */ 3897 private int expandStarColumnsInSelect(TSelectSqlStatement select, Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 3898 if (select == null || select.getResultColumnList() == null) return 0; 3899 3900 int count = 0; 3901 TResultColumnList resultCols = select.getResultColumnList(); 3902 3903 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3904 logInfo("expandStarColumnsInSelect: Processing SELECT with " + resultCols.size() + " result columns"); 3905 } 3906 3907 for (int i = 0; i < resultCols.size(); i++) { 3908 TResultColumn rc = resultCols.getResultColumn(i); 3909 if (rc == null || rc.getExpr() == null) continue; 3910 3911 TObjectName objName = rc.getExpr().getObjectOperand(); 3912 if (objName == null) continue; 3913 3914 String colStr = objName.toString(); 3915 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && colStr != null) { 3916 logInfo("expandStarColumnsInSelect: Column " + i + ": " + colStr); 3917 } 3918 if (colStr == null || !colStr.endsWith("*")) continue; 3919 3920 // This is a star column - expand it 3921 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3922 logInfo("expandStarColumnsInSelect: Found star column: " + colStr); 3923 } 3924 count += expandSingleStarColumn(objName, select, colStr, rc); 3925 3926 // Track the expanded attrs for syncing to column references 3927 ArrayList<TAttributeNode> attrList = objName.getAttributeNodesDerivedFromFromClause(); 3928 if (attrList != null && !attrList.isEmpty()) { 3929 expandedStarCols.put(colStr, attrList); 3930 } 3931 } 3932 3933 return count; 3934 } 3935 3936 /** 3937 * Expand a single star column using push-down inferred columns. 3938 * 3939 * @param starColumn The star column TObjectName (e.g., "*" or "src.*") 3940 * @param select The containing SELECT statement 3941 * @param colStr The string representation of the star column 3942 * @param resultColumn The TResultColumn containing the star (for EXCEPT column list) 3943 * @return Number of columns added 3944 */ 3945 private int expandSingleStarColumn(TObjectName starColumn, TSelectSqlStatement select, String colStr, TResultColumn resultColumn) { 3946 ArrayList<TAttributeNode> attrList = starColumn.getAttributeNodesDerivedFromFromClause(); 3947 3948 // Skip if already expanded 3949 if (!attrList.isEmpty()) { 3950 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3951 logInfo("expandSingleStarColumn: " + colStr + " already expanded with " + attrList.size() + " attrs"); 3952 } 3953 return 0; 3954 } 3955 3956 // Collect EXCEPT column names to exclude from expansion 3957 // (BigQuery: SELECT * EXCEPT (col1, col2) FROM ...) 3958 Set<String> exceptColumns = new HashSet<>(); 3959 if (resultColumn != null) { 3960 TObjectNameList exceptList = resultColumn.getExceptColumnList(); 3961 if (exceptList != null && exceptList.size() > 0) { 3962 for (int i = 0; i < exceptList.size(); i++) { 3963 TObjectName exceptCol = exceptList.getObjectName(i); 3964 if (exceptCol != null) { 3965 String exceptName = exceptCol.getColumnNameOnly(); 3966 if (exceptName != null && !exceptName.isEmpty()) { 3967 exceptColumns.add(exceptName.toUpperCase()); 3968 } 3969 } 3970 } 3971 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3972 logInfo("expandSingleStarColumn: Found " + exceptColumns.size() + 3973 " EXCEPT columns: " + exceptColumns); 3974 } 3975 } 3976 } 3977 3978 int count = 0; 3979 boolean isQualified = colStr.contains(".") && !colStr.equals("*"); 3980 3981 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3982 logInfo("expandSingleStarColumn: " + colStr + " isQualified=" + isQualified); 3983 } 3984 3985 if (isQualified) { 3986 // Qualified star (e.g., "src.*") - find the specific table/namespace 3987 String tablePrefix = colStr.substring(0, colStr.lastIndexOf('.')); 3988 count += expandQualifiedStar(starColumn, select, tablePrefix, attrList, exceptColumns); 3989 } else { 3990 // Unqualified star (*) - expand from all tables in FROM clause 3991 count += expandUnqualifiedStar(starColumn, select, attrList, exceptColumns); 3992 } 3993 3994 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3995 logInfo("expandSingleStarColumn: " + colStr + " expanded to " + count + " columns"); 3996 } 3997 3998 return count; 3999 } 4000 4001 /** 4002 * Expand a qualified star column (e.g., "src.*") using namespace inferred columns. 4003 * 4004 * @param starColumn The star column TObjectName 4005 * @param select The containing SELECT statement 4006 * @param tablePrefix The table prefix (e.g., "src" from "src.*") 4007 * @param attrList The list to add expanded attributes to 4008 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4009 */ 4010 private int expandQualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4011 String tablePrefix, ArrayList<TAttributeNode> attrList, 4012 Set<String> exceptColumns) { 4013 int count = 0; 4014 4015 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4016 logInfo("expandQualifiedStar: tablePrefix=" + tablePrefix + 4017 ", exceptColumns=" + (exceptColumns != null ? exceptColumns : "none")); 4018 } 4019 4020 // Find the source table by alias or name 4021 TTable sourceTable = findTableByPrefixInSelect(select, tablePrefix); 4022 if (sourceTable == null) { 4023 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4024 logInfo("expandQualifiedStar: No source table found for " + tablePrefix); 4025 } 4026 // Fall back to just adding the qualified star attribute 4027 TAttributeNode.addNodeToList( 4028 new TAttributeNode(tablePrefix + ".*", null), 4029 attrList 4030 ); 4031 return 0; 4032 } 4033 4034 // Collect inferred columns from multiple sources: 4035 // 1. The table's own namespace (TableNamespace) 4036 // 2. If the SELECT is a CTE definition, the CTE's namespace 4037 // 3. If the SELECT is a subquery, the containing scope's namespace 4038 Set<String> allInferredCols = new HashSet<>(); 4039 4040 // Source 1: Get namespace for this table 4041 INamespace tableNamespace = scopeBuildResult != null 4042 ? scopeBuildResult.getNamespaceForTable(sourceTable) 4043 : null; 4044 4045 if (tableNamespace != null) { 4046 Set<String> inferredCols = tableNamespace.getInferredColumns(); 4047 if (inferredCols != null) { 4048 allInferredCols.addAll(inferredCols); 4049 } 4050 } 4051 4052 // Source 2: Check if this SELECT is part of a CTE definition 4053 // If so, the CTE namespace may have inferred columns from outer queries 4054 Set<String> cteInferredCols = getInferredColumnsFromContainingCTE(select); 4055 if (cteInferredCols != null) { 4056 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4057 logInfo("expandQualifiedStar: Adding " + cteInferredCols.size() + 4058 " CTE inferred columns for " + tablePrefix); 4059 } 4060 allInferredCols.addAll(cteInferredCols); 4061 } 4062 4063 // Source 3: Check the SELECT's output scope for inferred columns 4064 // IMPORTANT: For qualified star columns (like ta.*), only use scope-level inferred columns 4065 // if they actually exist in this table's namespace. Otherwise we'd incorrectly add columns 4066 // from other tables in the FROM clause to this star's expanded attributes. 4067 IScope selectScope = scopeBuildResult != null 4068 ? scopeBuildResult.getScopeForStatement(select) 4069 : null; 4070 if (selectScope != null) { 4071 Set<String> scopeInferredCols = getInferredColumnsFromScope(selectScope); 4072 if (scopeInferredCols != null && tableNamespace != null) { 4073 // Only add scope-level inferred columns that actually exist in this table's namespace 4074 // This prevents columns from other tables being incorrectly associated with this star 4075 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4076 Set<String> tableInferredCols = tableNamespace.getInferredColumns(); 4077 for (String scopeCol : scopeInferredCols) { 4078 // Check if this column can be resolved within this table's namespace 4079 boolean hasInNamespace = (columnSources != null && columnSources.containsKey(scopeCol)) || 4080 (tableInferredCols != null && tableInferredCols.contains(scopeCol)); 4081 if (hasInNamespace) { 4082 allInferredCols.add(scopeCol); 4083 } 4084 } 4085 } else if (scopeInferredCols != null && tableNamespace == null) { 4086 // No table namespace - add all scope columns (fallback for edge cases) 4087 allInferredCols.addAll(scopeInferredCols); 4088 } 4089 } 4090 4091 if (!allInferredCols.isEmpty()) { 4092 // Expand using inferred columns, filtering out EXCEPT columns 4093 for (String colName : allInferredCols) { 4094 // Skip columns in EXCEPT clause 4095 if (exceptColumns != null && !exceptColumns.isEmpty() && 4096 exceptColumns.contains(colName.toUpperCase())) { 4097 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4098 logInfo("expandQualifiedStar: Skipping EXCEPT column: " + colName); 4099 } 4100 continue; 4101 } 4102 String attrName = tablePrefix + "." + colName; 4103 TAttributeNode.addNodeToList( 4104 new TAttributeNode(attrName, sourceTable), 4105 attrList 4106 ); 4107 count++; 4108 } 4109 } else if (tableNamespace != null) { 4110 // No inferred columns - try to get from namespace's column sources 4111 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4112 if (columnSources != null && !columnSources.isEmpty()) { 4113 for (String colName : columnSources.keySet()) { 4114 // Skip columns in EXCEPT clause 4115 if (exceptColumns != null && !exceptColumns.isEmpty() && 4116 exceptColumns.contains(colName.toUpperCase())) { 4117 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4118 logInfo("expandQualifiedStar: Skipping EXCEPT column from sources: " + colName); 4119 } 4120 continue; 4121 } 4122 String attrName = tablePrefix + "." + colName; 4123 TAttributeNode.addNodeToList( 4124 new TAttributeNode(attrName, sourceTable), 4125 attrList 4126 ); 4127 count++; 4128 } 4129 } 4130 } 4131 4132 // If no columns were added, add the star as fallback 4133 if (count == 0) { 4134 TAttributeNode.addNodeToList( 4135 new TAttributeNode(tablePrefix + ".*", sourceTable), 4136 attrList 4137 ); 4138 } 4139 4140 return count; 4141 } 4142 4143 /** 4144 * Get inferred columns from a CTE that contains the given SELECT statement. 4145 * Used for push-down: when outer queries reference columns from a CTE, 4146 * those columns are inferred in the CTE's namespace and should be used 4147 * to expand star columns in the CTE's SELECT. 4148 */ 4149 private Set<String> getInferredColumnsFromContainingCTE(TSelectSqlStatement select) { 4150 if (select == null || scopeBuildResult == null || namespaceEnhancer == null) { 4151 return null; 4152 } 4153 4154 // Find the CTE that defines this SELECT 4155 Set<INamespace> starNamespaces = namespaceEnhancer.getStarNamespaces(); 4156 if (starNamespaces == null) { 4157 return null; 4158 } 4159 4160 for (INamespace ns : starNamespaces) { 4161 if (ns instanceof CTENamespace) { 4162 CTENamespace cteNs = (CTENamespace) ns; 4163 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 4164 // Check both by reference and by start token position 4165 if (cteSelect == select || 4166 (cteSelect != null && select != null && 4167 cteSelect.getStartToken() != null && select.getStartToken() != null && 4168 cteSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4169 Set<String> inferredCols = cteNs.getInferredColumns(); 4170 if (inferredCols != null && !inferredCols.isEmpty()) { 4171 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4172 logInfo("getInferredColumnsFromContainingCTE: Found CTE " + cteNs.getDisplayName() + 4173 " with " + inferredCols.size() + " inferred columns"); 4174 } 4175 return inferredCols; 4176 } 4177 } 4178 } else if (ns instanceof SubqueryNamespace) { 4179 SubqueryNamespace subNs = (SubqueryNamespace) ns; 4180 TSelectSqlStatement subSelect = subNs.getSelectStatement(); 4181 if (subSelect == select || 4182 (subSelect != null && select != null && 4183 subSelect.getStartToken() != null && select.getStartToken() != null && 4184 subSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4185 Set<String> inferredCols = subNs.getInferredColumns(); 4186 if (inferredCols != null && !inferredCols.isEmpty()) { 4187 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4188 logInfo("getInferredColumnsFromContainingCTE: Found Subquery with " + 4189 inferredCols.size() + " inferred columns"); 4190 } 4191 return inferredCols; 4192 } 4193 } 4194 } 4195 } 4196 4197 return null; 4198 } 4199 4200 /** 4201 * Get inferred columns from namespaces in a scope's FROM clause. 4202 */ 4203 private Set<String> getInferredColumnsFromScope(IScope scope) { 4204 if (scope == null) { 4205 return null; 4206 } 4207 4208 Set<String> result = new HashSet<>(); 4209 4210 // Check all namespaces in the scope's children 4211 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4212 INamespace ns = child.getNamespace(); 4213 if (ns != null) { 4214 Set<String> inferredCols = ns.getInferredColumns(); 4215 if (inferredCols != null) { 4216 result.addAll(inferredCols); 4217 } 4218 } 4219 } 4220 4221 return result.isEmpty() ? null : result; 4222 } 4223 4224 /** 4225 * Expand an unqualified star column (*) using all tables in FROM clause. 4226 * 4227 * @param starColumn The star column TObjectName 4228 * @param select The containing SELECT statement 4229 * @param attrList The list to add expanded attributes to 4230 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4231 */ 4232 private int expandUnqualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4233 ArrayList<TAttributeNode> attrList, Set<String> exceptColumns) { 4234 int count = 0; 4235 4236 if (select.tables == null) return 0; 4237 4238 for (int i = 0; i < select.tables.size(); i++) { 4239 TTable table = select.tables.getTable(i); 4240 if (table == null) continue; 4241 4242 // Skip certain table types 4243 if (table.getTableType() == ETableSource.join) continue; 4244 4245 String tablePrefix = table.getAliasName(); 4246 if (tablePrefix == null || tablePrefix.isEmpty()) { 4247 tablePrefix = table.getName(); 4248 } 4249 if (tablePrefix == null) continue; 4250 4251 // Get namespace for this table 4252 INamespace namespace = scopeBuildResult != null 4253 ? scopeBuildResult.getNamespaceForTable(table) 4254 : null; 4255 4256 if (namespace != null) { 4257 Set<String> inferredCols = namespace.getInferredColumns(); 4258 4259 if (inferredCols != null && !inferredCols.isEmpty()) { 4260 for (String colName : inferredCols) { 4261 // Skip columns in EXCEPT clause 4262 if (exceptColumns != null && !exceptColumns.isEmpty() && 4263 exceptColumns.contains(colName.toUpperCase())) { 4264 continue; 4265 } 4266 String attrName = tablePrefix + "." + colName; 4267 TAttributeNode.addNodeToList( 4268 new TAttributeNode(attrName, table), 4269 attrList 4270 ); 4271 count++; 4272 } 4273 } else { 4274 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 4275 if (columnSources != null && !columnSources.isEmpty()) { 4276 for (String colName : columnSources.keySet()) { 4277 // Skip columns in EXCEPT clause 4278 if (exceptColumns != null && !exceptColumns.isEmpty() && 4279 exceptColumns.contains(colName.toUpperCase())) { 4280 continue; 4281 } 4282 String attrName = tablePrefix + "." + colName; 4283 TAttributeNode.addNodeToList( 4284 new TAttributeNode(attrName, table), 4285 attrList 4286 ); 4287 count++; 4288 } 4289 } 4290 } 4291 } 4292 4293 // If no columns for this table, add the star as fallback 4294 if (count == 0 || (namespace != null && namespace.getInferredColumns().isEmpty() 4295 && namespace.getAllColumnSources().isEmpty())) { 4296 TAttributeNode.addNodeToList( 4297 new TAttributeNode(tablePrefix + ".*", table), 4298 attrList 4299 ); 4300 } 4301 } 4302 4303 return count; 4304 } 4305 4306 /** 4307 * Find a table by its prefix (alias or name) in a SELECT statement. 4308 */ 4309 private TTable findTableByPrefixInSelect(TSelectSqlStatement select, String prefix) { 4310 if (select == null || select.tables == null || prefix == null) return null; 4311 4312 // Normalize prefix (remove backticks, quotes, schema prefix for comparison) 4313 String normalizedPrefix = normalizeTablePrefix(prefix); 4314 4315 for (int i = 0; i < select.tables.size(); i++) { 4316 TTable table = select.tables.getTable(i); 4317 if (table == null) continue; 4318 4319 // Check alias first 4320 String alias = table.getAliasName(); 4321 if (alias != null && normalizeTablePrefix(alias).equalsIgnoreCase(normalizedPrefix)) { 4322 return table; 4323 } 4324 4325 // Check table name 4326 String name = table.getName(); 4327 if (name != null && normalizeTablePrefix(name).equalsIgnoreCase(normalizedPrefix)) { 4328 return table; 4329 } 4330 4331 // Check full table name (with schema) 4332 if (table.getTableName() != null) { 4333 String fullName = table.getTableName().toString(); 4334 if (fullName != null && normalizeTablePrefix(fullName).equalsIgnoreCase(normalizedPrefix)) { 4335 return table; 4336 } 4337 } 4338 } 4339 4340 return null; 4341 } 4342 4343 /** 4344 * Normalize table prefix for comparison (remove quotes, backticks). 4345 */ 4346 private String normalizeTablePrefix(String prefix) { 4347 if (prefix == null) return ""; 4348 String result = prefix.trim(); 4349 // Remove backticks 4350 if (result.startsWith("`") && result.endsWith("`")) { 4351 result = result.substring(1, result.length() - 1); 4352 } 4353 // Remove double quotes 4354 if (result.startsWith("\"") && result.endsWith("\"")) { 4355 result = result.substring(1, result.length() - 1); 4356 } 4357 // Remove brackets 4358 if (result.startsWith("[") && result.endsWith("]")) { 4359 result = result.substring(1, result.length() - 1); 4360 } 4361 return result; 4362 } 4363 4364 /** 4365 * Get resolution statistics 4366 */ 4367 public ResolutionStatistics getStatistics() { 4368 return resolutionContext.getStatistics(); 4369 } 4370 4371 /** 4372 * Get the resolution context (for advanced queries) 4373 */ 4374 public ResolutionContext getContext() { 4375 return resolutionContext; 4376 } 4377 4378 /** 4379 * Get the global scope 4380 */ 4381 public GlobalScope getGlobalScope() { 4382 return globalScope; 4383 } 4384 4385 /** 4386 * Get the configuration 4387 */ 4388 public TSQLResolverConfig getConfig() { 4389 return config; 4390 } 4391 4392 /** 4393 * Get the pass history (for iterative resolution analysis) 4394 * 4395 * @return list of all resolution passes (empty if non-iterative or not yet resolved) 4396 */ 4397 public List<ResolutionPass> getPassHistory() { 4398 return new ArrayList<>(passHistory); 4399 } 4400 4401 /** 4402 * Get the convergence detector (for iterative resolution analysis) 4403 * 4404 * @return convergence detector (null if iterative resolution is disabled) 4405 */ 4406 public ConvergenceDetector getConvergenceDetector() { 4407 return convergenceDetector; 4408 } 4409 4410 /** 4411 * Get the scope build result (for testing and analysis) 4412 * 4413 * @return scope build result from ScopeBuilder (null if not yet resolved) 4414 */ 4415 public ScopeBuildResult getScopeBuildResult() { 4416 return scopeBuildResult; 4417 } 4418 4419 /** 4420 * Get the resolution result access interface. 4421 * This provides a clean, statement-centric API for accessing resolution results. 4422 * 4423 * <p>Usage example:</p> 4424 * <pre> 4425 * TSQLResolver2 resolver = new TSQLResolver2(null, parser.sqlstatements); 4426 * resolver.resolve(); 4427 * 4428 * IResolutionResult result = resolver.getResult(); 4429 * 4430 * for (TCustomSqlStatement stmt : parser.sqlstatements) { 4431 * for (TTable table : result.getTables(stmt)) { 4432 * System.out.println("Table: " + table.getFullName()); 4433 * for (TObjectName col : result.getColumnsForTable(stmt, table)) { 4434 * System.out.println(" Column: " + col.getColumnNameOnly()); 4435 * } 4436 * } 4437 * } 4438 * </pre> 4439 * 4440 * @return resolution result access interface 4441 * @throws IllegalStateException if resolve() has not been called 4442 */ 4443 public IResolutionResult getResult() { 4444 if (scopeBuildResult == null) { 4445 throw new IllegalStateException( 4446 "Must call resolve() before getResult()"); 4447 } 4448 return new ResolutionResultImpl(scopeBuildResult, sqlStatements); 4449 } 4450 4451 // ===== Star Column Reverse Inference Support (Principle 3) ===== 4452 4453 /** 4454 * Star Column push-down context for reverse inference. 4455 * Tracks which columns should be added to which Namespaces based on 4456 * outer layer references. 4457 */ 4458 private static class StarPushDownContext { 4459 /** Namespace -> (ColumnName -> Confidence) */ 4460 private final Map<INamespace, Map<String, Double>> pushDownMap = new HashMap<>(); 4461 4462 /** 4463 * Record that a column should be added to a namespace. 4464 * If the same column is pushed multiple times, keep the highest confidence. 4465 */ 4466 public void pushColumn(INamespace namespace, String columnName, double confidence) { 4467 Map<String, Double> columns = pushDownMap.computeIfAbsent(namespace, k -> new HashMap<>()); 4468 columns.put(columnName, Math.max(confidence, columns.getOrDefault(columnName, 0.0))); 4469 } 4470 4471 /** 4472 * Get all columns that should be pushed to each namespace. 4473 */ 4474 public Map<INamespace, java.util.Set<String>> getAllPushDownColumns() { 4475 Map<INamespace, java.util.Set<String>> result = new HashMap<>(); 4476 for (Map.Entry<INamespace, Map<String, Double>> entry : pushDownMap.entrySet()) { 4477 result.put(entry.getKey(), entry.getValue().keySet()); 4478 } 4479 return result; 4480 } 4481 4482 /** 4483 * Get the confidence score for a specific column in a namespace. 4484 */ 4485 public double getConfidence(INamespace namespace, String columnName) { 4486 return pushDownMap.getOrDefault(namespace, java.util.Collections.emptyMap()) 4487 .getOrDefault(columnName, 0.0); 4488 } 4489 4490 /** 4491 * Get the total number of columns to be pushed down across all namespaces. 4492 */ 4493 public int getTotalPushedColumns() { 4494 return pushDownMap.values().stream() 4495 .mapToInt(Map::size) 4496 .sum(); 4497 } 4498 } 4499 4500 /** 4501 * Represents a star column source (CTE or subquery with SELECT *). 4502 * Used for reverse inference to track which columns are required from the star. 4503 */ 4504 private static class StarColumnSource { 4505 private final String name; // CTE name or subquery alias 4506 private final INamespace namespace; // The namespace for this source 4507 private final INamespace underlyingTableNamespace; // Namespace of the table behind SELECT * 4508 private final java.util.Set<String> requiredColumns = new java.util.HashSet<>(); 4509 4510 public StarColumnSource(String name, INamespace namespace, INamespace underlyingTableNamespace) { 4511 this.name = name; 4512 this.namespace = namespace; 4513 this.underlyingTableNamespace = underlyingTableNamespace; 4514 } 4515 4516 public String getName() { 4517 return name; 4518 } 4519 4520 public INamespace getNamespace() { 4521 return namespace; 4522 } 4523 4524 public void addRequiredColumn(String columnName) { 4525 requiredColumns.add(columnName); 4526 } 4527 4528 public java.util.Set<String> getRequiredColumns() { 4529 return requiredColumns; 4530 } 4531 4532 public boolean hasUnderlyingTable() { 4533 return underlyingTableNamespace != null; 4534 } 4535 4536 public INamespace getUnderlyingTableNamespace() { 4537 return underlyingTableNamespace; 4538 } 4539 4540 @Override 4541 public String toString() { 4542 return String.format("StarColumnSource[%s, required=%d]", name, requiredColumns.size()); 4543 } 4544 } 4545 4546 /** 4547 * Collect all star column sources (CTEs and subqueries with SELECT *). 4548 * Traverses the scope tree to find CTENamespace and SubqueryNamespace 4549 * that use SELECT * in their subqueries. 4550 */ 4551 private List<StarColumnSource> collectAllStarColumnSources() { 4552 List<StarColumnSource> sources = new ArrayList<>(); 4553 4554 // Traverse global scope tree 4555 if (globalScope != null) { 4556 collectStarSourcesFromScope(globalScope, sources); 4557 } 4558 4559 // Also traverse UPDATE scopes (for Teradata UPDATE...FROM syntax) 4560 if (scopeBuilder != null) { 4561 for (UpdateScope updateScope : scopeBuilder.getUpdateScopeMap().values()) { 4562 collectStarSourcesFromScope(updateScope, sources); 4563 } 4564 for (DeleteScope deleteScope : scopeBuilder.getDeleteScopeMap().values()) { 4565 collectStarSourcesFromScope(deleteScope, sources); 4566 } 4567 } 4568 4569 logDebug("Collected " + sources.size() + " star column sources"); 4570 return sources; 4571 } 4572 4573 /** 4574 * Recursively collect star column sources from a scope and its children. 4575 */ 4576 private void collectStarSourcesFromScope(IScope scope, List<StarColumnSource> sources) { 4577 // Check all child namespaces in this scope 4578 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4579 INamespace namespace = child.getNamespace(); 4580 4581 // Use the new interface method to check for star columns 4582 if (namespace.hasStarColumn()) { 4583 TSelectSqlStatement selectStmt = namespace.getSelectStatement(); 4584 INamespace underlyingNs = selectStmt != null ? getFirstTableNamespace(selectStmt) : null; 4585 4586 StarColumnSource starSource = new StarColumnSource( 4587 namespace.getDisplayName(), 4588 namespace, 4589 underlyingNs 4590 ); 4591 sources.add(starSource); 4592 4593 logDebug("Found star source: " + namespace.getDisplayName()); 4594 } 4595 } 4596 4597 // Recursively traverse child scopes based on scope type 4598 if (scope instanceof SelectScope) { 4599 SelectScope selectScope = (SelectScope) scope; 4600 if (selectScope.getFromScope() != null) { 4601 collectStarSourcesFromScope(selectScope.getFromScope(), sources); 4602 } 4603 } else if (scope instanceof UpdateScope) { 4604 UpdateScope updateScope = (UpdateScope) scope; 4605 if (updateScope.getFromScope() != null) { 4606 collectStarSourcesFromScope(updateScope.getFromScope(), sources); 4607 } 4608 } else if (scope instanceof DeleteScope) { 4609 DeleteScope deleteScope = (DeleteScope) scope; 4610 if (deleteScope.getFromScope() != null) { 4611 collectStarSourcesFromScope(deleteScope.getFromScope(), sources); 4612 } 4613 } 4614 } 4615 4616 4617 /** 4618 * Get the first table namespace from a SELECT statement's FROM clause. 4619 * Returns the DynamicStarSource if available. 4620 */ 4621 private INamespace getFirstTableNamespace(TSelectSqlStatement select) { 4622 if (select == null || select.tables == null || select.tables.size() == 0) { 4623 return null; 4624 } 4625 4626 // Get first table 4627 TTable firstTable = select.tables.getTable(0); 4628 String tableName = firstTable.getAliasName() != null 4629 ? firstTable.getAliasName() 4630 : firstTable.getName(); 4631 4632 // Search for corresponding namespace in all dynamic namespaces 4633 List<INamespace> dynamicNamespaces = getAllDynamicNamespaces(); 4634 for (INamespace ns : dynamicNamespaces) { 4635 if (ns.getDisplayName().equals(tableName)) { 4636 return ns; 4637 } 4638 } 4639 4640 return null; 4641 } 4642 4643 /** 4644 * Collect all outer references to a star column source. 4645 * Searches through allColumnReferences for columns that reference this star source. 4646 */ 4647 private List<TObjectName> collectOuterReferencesToSource(StarColumnSource starSource) { 4648 List<TObjectName> references = new ArrayList<>(); 4649 4650 if (starSource == null || starSource.getName() == null) { 4651 return references; 4652 } 4653 4654 String sourceName = starSource.getName(); 4655 4656 // Search through all collected column references 4657 for (TObjectName objName : allColumnReferences) { 4658 if (objName == null) { 4659 continue; 4660 } 4661 4662 // Check if this column reference is from the star source 4663 // E.g., for CTE named "my_cte", check if objName is like "my_cte.col1" 4664 String tableQualifier = getTableQualifier(objName); 4665 4666 if (tableQualifier != null && tableQualifier.equalsIgnoreCase(sourceName)) { 4667 references.add(objName); 4668 logDebug("Found outer reference: " + objName + " -> " + sourceName); 4669 } 4670 } 4671 4672 logDebug("Collected " + references.size() + " outer references for: " + sourceName); 4673 return references; 4674 } 4675 4676 /** 4677 * Get the table qualifier from a TObjectName. 4678 * E.g., for "schema.table.column", returns "table" 4679 * E.g., for "table.column", returns "table" 4680 * E.g., for "column", returns null 4681 */ 4682 private String getTableQualifier(TObjectName objName) { 4683 if (objName == null) { 4684 return null; 4685 } 4686 4687 // TObjectName has parts like: [schema, table, column] 4688 // or [table, column] 4689 // or [column] 4690 4691 // If there are 3 or more parts, the second-to-last is the table 4692 // If there are 2 parts, the first is the table 4693 // If there is 1 part, there's no table qualifier 4694 4695 String fullName = objName.toString(); 4696 String[] parts = fullName.split("\\."); 4697 4698 if (parts.length >= 3) { 4699 // schema.table.column -> return table 4700 return parts[parts.length - 2]; 4701 } else if (parts.length == 2) { 4702 // table.column -> return table 4703 return parts[0]; 4704 } else { 4705 // Just column name, no qualifier 4706 return null; 4707 } 4708 } 4709 4710 /** 4711 * Get all DynamicStarSource namespaces from the scope tree. 4712 * This is used to apply inference results to namespaces that need enhancement. 4713 */ 4714 private List<INamespace> getAllDynamicNamespaces() { 4715 List<INamespace> result = new ArrayList<>(); 4716 4717 // Collect from global scope tree 4718 if (globalScope != null) { 4719 collectDynamicNamespacesFromScope(globalScope, result); 4720 } 4721 4722 return result; 4723 } 4724 4725 /** 4726 * Recursively collect DynamicStarSource namespaces from a scope and its children. 4727 */ 4728 private void collectDynamicNamespacesFromScope(IScope scope, List<INamespace> result) { 4729 if (scope == null) { 4730 return; 4731 } 4732 4733 // Get all child namespaces from this scope 4734 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4735 INamespace namespace = child.getNamespace(); 4736 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.DynamicStarSource) { 4737 result.add(namespace); 4738 logDebug("Found DynamicStarSource: " + namespace.getDisplayName()); 4739 } 4740 } 4741 4742 // Recursively traverse child scopes based on scope type 4743 if (scope instanceof SelectScope) { 4744 SelectScope selectScope = (SelectScope) scope; 4745 4746 // Traverse FROM scope 4747 if (selectScope.getFromScope() != null) { 4748 collectDynamicNamespacesFromScope(selectScope.getFromScope(), result); 4749 } 4750 } else if (scope instanceof CTEScope) { 4751 CTEScope cteScope = (CTEScope) scope; 4752 4753 // CTEs are already included in the children check above 4754 // But we need to check their subqueries by traversing nested scopes 4755 // The CTE namespaces themselves contain references to subquery scopes 4756 } else if (scope instanceof FromScope) { 4757 FromScope fromScope = (FromScope) scope; 4758 4759 // FROM scope children are already checked above 4760 // No additional child scopes to traverse 4761 } else if (scope instanceof GroupByScope) { 4762 GroupByScope groupByScope = (GroupByScope) scope; 4763 4764 // GroupBy scope typically doesn't have child scopes 4765 } else if (scope instanceof HavingScope) { 4766 HavingScope havingScope = (HavingScope) scope; 4767 4768 // Having scope typically doesn't have child scopes 4769 } else if (scope instanceof OrderByScope) { 4770 OrderByScope orderByScope = (OrderByScope) scope; 4771 4772 // OrderBy scope typically doesn't have child scopes 4773 } 4774 4775 // Additionally, traverse parent-child scope relationships 4776 // by checking if any of the namespaces contain nested SELECT statements 4777 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4778 INamespace namespace = child.getNamespace(); 4779 4780 // If this is a SubqueryNamespace, it contains a SELECT with its own scope tree 4781 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace) { 4782 // Subquery scopes are processed during scope building 4783 // and would be in statementScopeCache if we tracked them 4784 } 4785 } 4786 } 4787 4788 // ===== Logging helpers ===== 4789 4790 private void logInfo(String message) { 4791 TBaseType.log("[TSQLResolver2] " + message, TLog.INFO); 4792 } 4793 4794 private void logDebug(String message) { 4795 TBaseType.log("[TSQLResolver2] " + message, TLog.DEBUG); 4796 } 4797 4798 private void logError(String message) { 4799 TBaseType.log("[TSQLResolver2] " + message, TLog.ERROR); 4800 } 4801}