001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.TBaseType; 004import gudusoft.gsqlparser.TCustomSqlStatement; 005import gudusoft.gsqlparser.IRelation; 006import gudusoft.gsqlparser.TLog; 007import gudusoft.gsqlparser.TSourceToken; 008import gudusoft.gsqlparser.TStatementList; 009import gudusoft.gsqlparser.ETableSource; 010import gudusoft.gsqlparser.EDbVendor; 011import gudusoft.gsqlparser.EDbObjectType; 012import gudusoft.gsqlparser.EErrorType; 013import gudusoft.gsqlparser.ESqlClause; 014import gudusoft.gsqlparser.ESqlStatementType; 015import gudusoft.gsqlparser.TSyntaxError; 016import gudusoft.gsqlparser.stmt.dax.TDaxStmt; 017import gudusoft.gsqlparser.stmt.TAlterTableStatement; 018import gudusoft.gsqlparser.stmt.TCreateTableSqlStatement; 019import gudusoft.gsqlparser.stmt.TInsertSqlStatement; 020import gudusoft.gsqlparser.stmt.TUpdateSqlStatement; 021import gudusoft.gsqlparser.stmt.TDeleteSqlStatement; 022import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 023import gudusoft.gsqlparser.compiler.TContext; 024import gudusoft.gsqlparser.nodes.TObjectName; 025import gudusoft.gsqlparser.nodes.TObjectNameList; 026import gudusoft.gsqlparser.nodes.TTable; 027import gudusoft.gsqlparser.nodes.TJoinExpr; 028import gudusoft.gsqlparser.nodes.TParseTreeNode; 029import gudusoft.gsqlparser.nodes.TParseTreeVisitor; 030import gudusoft.gsqlparser.nodes.TResultColumn; 031import gudusoft.gsqlparser.nodes.TResultColumnList; 032import gudusoft.gsqlparser.nodes.TQualifyClause; 033import gudusoft.gsqlparser.nodes.TExpression; 034import gudusoft.gsqlparser.EExpressionType; 035import gudusoft.gsqlparser.resolver2.model.ColumnSource; 036import gudusoft.gsqlparser.resolver2.model.FromScopeIndex; 037import gudusoft.gsqlparser.resolver2.model.ResolutionContext; 038import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 039import gudusoft.gsqlparser.resolver2.model.ResolutionStatistics; 040import gudusoft.gsqlparser.resolver2.ResolutionStatus; 041import gudusoft.gsqlparser.resolver2.result.IResolutionResult; 042import gudusoft.gsqlparser.resolver2.result.ResolutionResultImpl; 043import gudusoft.gsqlparser.resolver2.scope.FromScope; 044import gudusoft.gsqlparser.resolver2.scope.GlobalScope; 045import gudusoft.gsqlparser.resolver2.scope.IScope; 046import gudusoft.gsqlparser.resolver2.scope.SelectScope; 047import gudusoft.gsqlparser.resolver2.scope.CTEScope; 048import gudusoft.gsqlparser.resolver2.scope.GroupByScope; 049import gudusoft.gsqlparser.resolver2.scope.HavingScope; 050import gudusoft.gsqlparser.resolver2.scope.OrderByScope; 051import gudusoft.gsqlparser.resolver2.scope.UpdateScope; 052import gudusoft.gsqlparser.resolver2.scope.DeleteScope; 053import gudusoft.gsqlparser.resolver2.namespace.INamespace; 054import gudusoft.gsqlparser.resolver2.namespace.TableNamespace; 055import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 056import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 057import gudusoft.gsqlparser.nodes.TCTE; 058import gudusoft.gsqlparser.nodes.TCTEList; 059import gudusoft.gsqlparser.nodes.TUnnestClause; 060import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 061import gudusoft.gsqlparser.resolver2.iterative.ConvergenceDetector; 062import gudusoft.gsqlparser.resolver2.iterative.ResolutionPass; 063import gudusoft.gsqlparser.resolver2.enhancement.NamespaceEnhancer; 064import gudusoft.gsqlparser.resolver2.enhancement.EnhancementResult; 065import gudusoft.gsqlparser.resolver2.enhancement.CollectedColumnRef; 066import gudusoft.gsqlparser.resolver2.metadata.BatchMetadataCollector; 067import gudusoft.gsqlparser.resolver2.context.DatabaseContextTracker; 068import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 069import gudusoft.gsqlparser.sqlenv.TSQLEnv; 070import gudusoft.gsqlparser.TAttributeNode; 071 072import java.util.ArrayDeque; 073import java.util.ArrayList; 074import java.util.Deque; 075import java.util.HashMap; 076import java.util.HashSet; 077import java.util.IdentityHashMap; 078import java.util.List; 079import java.util.Map; 080import java.util.Set; 081 082// ScopeBuilder for visitor-based scope construction 083import gudusoft.gsqlparser.resolver2.ScopeBuilder; 084import gudusoft.gsqlparser.resolver2.ScopeBuildResult; 085 086/** 087 * New SQL Resolver - Phase 2 Enhanced Framework 088 * 089 * This is the main entry point for the new resolution architecture. 090 * Provides improved column-to-table resolution with: 091 * - Clear scope-based name resolution 092 * - Full candidate collection for ambiguous cases 093 * - Confidence-scored inference 094 * - Better tracing and debugging 095 * 096 * Usage: 097 * <pre> 098 * TSQLResolver2 resolver = new TSQLResolver2(context, statements); 099 * boolean success = resolver.resolve(); 100 * ResolutionStatistics stats = resolver.getStatistics(); 101 * </pre> 102 * 103 * Phase 1 capabilities: 104 * - Basic SELECT statement resolution 105 * - Table and subquery namespaces 106 * - Qualified and unqualified column references 107 * - FROM clause scope management 108 * 109 * Phase 2 capabilities: 110 * - JOIN scope handling with nullable semantics 111 * - CTE (WITH clause) resolution 112 * - Iterative resolution framework (auto-converges after first pass if no iteration needed) 113 * 114 * Future phases will add: 115 * - Evidence-based inference 116 * - Star column expansion 117 */ 118public class TSQLResolver2 { 119 120 private final TContext globalContext; 121 private final TStatementList sqlStatements; 122 private final TSQLResolverConfig config; 123 private final ResolutionContext resolutionContext; 124 private final NameResolver nameResolver; 125 126 /** Global scope (root of scope tree) */ 127 private GlobalScope globalScope; 128 129 /** Convergence detector for iterative resolution */ 130 private ConvergenceDetector convergenceDetector; 131 132 /** History of all resolution passes */ 133 private final List<ResolutionPass> passHistory; 134 135 /** 136 * Scope cache for iterative resolution. 137 * Maps statements to their scope trees to avoid rebuilding scopes on each pass. 138 * Key: TCustomSqlStatement, Value: SelectScope (or other scope type) 139 */ 140 private final java.util.Map<Object, IScope> statementScopeCache; 141 142 /** 143 * Column-to-Scope mapping for iterative resolution (Principle 1: Scope完全复用). 144 * Built once in Pass 1, reused in Pass 2+ to avoid rebuilding scopes. 145 * Maps each TObjectName (column reference) to the IScope where it should be resolved. 146 */ 147 private final java.util.Map<TObjectName, IScope> columnToScopeMap; 148 149 /** 150 * FromScope index cache for O(1) table/namespace lookups (Performance Optimization B). 151 * Maps FromScope instances to their pre-built indexes. 152 * Built lazily on first access, cleared at the start of each resolve() call. 153 * Uses IdentityHashMap because we need object identity, not equals(). 154 */ 155 private final Map<IScope, FromScopeIndex> fromScopeIndexCache; 156 157 /** 158 * Cache for Teradata NAMED alias lookup. 159 * Maps SELECT statements to their alias index (alias name -> TResultColumn). 160 * Uses IdentityHashMap because we need object identity, not equals(). 161 * Optimization C: Reduces O(cols * select_items) to O(cols) for Teradata. 162 */ 163 private final Map<TSelectSqlStatement, Map<String, TResultColumn>> teradataNamedAliasCache; 164 165 /** 166 * All column references collected during Pass 1 (Principle 1: Scope完全复用). 167 * Used in Pass 2+ to re-resolve names without rebuilding the scope tree. 168 */ 169 private final List<TObjectName> allColumnReferences; 170 171 /** 172 * ScopeBuilder for visitor-based scope construction. 173 * Replaces manual scope building with proper nested scope handling. 174 */ 175 private final ScopeBuilder scopeBuilder; 176 177 /** 178 * Result from ScopeBuilder containing the complete scope tree. 179 * This is populated in Pass 1 and reused in Pass 2+. 180 */ 181 private ScopeBuildResult scopeBuildResult; 182 183 /** 184 * NamespaceEnhancer for explicit column collection and enhancement. 185 * Handles the explicit namespace enhancement phase between resolution passes. 186 * Columns are collected during resolution and added to namespaces explicitly. 187 */ 188 private NamespaceEnhancer namespaceEnhancer; 189 190 /** 191 * Create resolver with default configuration 192 */ 193 public TSQLResolver2(TContext context, TStatementList statements) { 194 this(context, statements, TSQLResolverConfig.createDefault()); 195 } 196 197 /** 198 * Create resolver with custom configuration 199 */ 200 public TSQLResolver2(TContext context, TStatementList statements, TSQLResolverConfig config) { 201 this.globalContext = context; 202 this.sqlStatements = statements; 203 this.config = config; 204 this.resolutionContext = new ResolutionContext(); 205 this.nameResolver = new NameResolver(config, resolutionContext); 206 this.passHistory = new ArrayList<>(); 207 this.statementScopeCache = new java.util.HashMap<>(); 208 this.columnToScopeMap = new java.util.HashMap<>(); 209 this.fromScopeIndexCache = new IdentityHashMap<>(); 210 this.teradataNamedAliasCache = new IdentityHashMap<>(); 211 this.allColumnReferences = new ArrayList<>(); 212 213 // Initialize ScopeBuilder for visitor-based scope construction 214 this.scopeBuilder = new ScopeBuilder(context, config.getNameMatcher()); 215 // Pass guessColumnStrategy from config for namespace isolation (prevents test side effects) 216 if (config.hasCustomGuessColumnStrategy()) { 217 this.scopeBuilder.setGuessColumnStrategy(config.getGuessColumnStrategy()); 218 } 219 220 // If context is null, try to get TSQLEnv from statements 221 // This allows TSQLEnv to flow from parser.setSqlEnv() through statements 222 if (statements != null && statements.size() > 0) { 223 try { 224 TCustomSqlStatement firstStmt = statements.get(0); 225 if (firstStmt != null && firstStmt.getGlobalScope() != null && 226 firstStmt.getGlobalScope().getSqlEnv() != null) { 227 this.scopeBuilder.setSqlEnv(firstStmt.getGlobalScope().getSqlEnv()); 228 } 229 } catch (Exception e) { 230 // Silently ignore - SQLEnv is optional enhancement 231 } 232 } 233 234 // Initialize convergence detector for iterative resolution 235 this.convergenceDetector = new ConvergenceDetector( 236 config.getMaxIterations(), 237 config.getStablePassesForConvergence(), 238 config.getMinProgressRate() 239 ); 240 241 // Initialize namespace enhancer for explicit column collection 242 // Debug mode follows the global resolver log setting 243 this.namespaceEnhancer = new NamespaceEnhancer(TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE); 244 } 245 246 /** 247 * Set the TSQLEnv to use for table metadata lookup. 248 * This allows external callers to provide TSQLEnv if automatic detection fails. 249 * 250 * @param sqlEnv the SQL environment containing table metadata 251 */ 252 public void setSqlEnv(gudusoft.gsqlparser.sqlenv.TSQLEnv sqlEnv) { 253 if (scopeBuilder != null) { 254 scopeBuilder.setSqlEnv(sqlEnv); 255 } 256 } 257 258 /** 259 * Get the TSQLEnv used for table metadata lookup. 260 * 261 * @return the SQL environment, or null if not set 262 */ 263 public gudusoft.gsqlparser.sqlenv.TSQLEnv getSqlEnv() { 264 return scopeBuilder != null ? scopeBuilder.getSqlEnv() : null; 265 } 266 267 /** 268 * Get the set of virtual trigger tables (deleted/inserted in SQL Server triggers). 269 * These tables should be excluded from table output since their columns are 270 * resolved to the trigger's target table. 271 * 272 * @return Set of TTable objects that are virtual trigger tables 273 */ 274 public java.util.Set<gudusoft.gsqlparser.nodes.TTable> getVirtualTriggerTables() { 275 return scopeBuilder != null ? scopeBuilder.getVirtualTriggerTables() : java.util.Collections.emptySet(); 276 } 277 278 /** 279 * Get the SQL statements being resolved. 280 * 281 * @return the list of SQL statements 282 */ 283 public TStatementList getStatements() { 284 return sqlStatements; 285 } 286 287 // Performance timing fields (instance-level for single resolve() call) 288 private long timeScopeBuilder = 0; 289 private long timeNameResolution = 0; 290 private long timeEnhancement = 0; 291 private long timeLegacySync = 0; 292 private long timeOther = 0; 293 294 // Global accumulators for profiling across all resolve() calls 295 private static long globalTimeScopeBuilder = 0; 296 private static long globalTimeNameResolution = 0; 297 private static long globalTimeEnhancement = 0; 298 private static long globalTimeLegacySync = 0; 299 private static long globalTimeOther = 0; 300 private static int globalResolveCount = 0; 301 302 /** 303 * Reset global timing accumulators. 304 */ 305 public static void resetGlobalTimings() { 306 globalTimeScopeBuilder = 0; 307 globalTimeNameResolution = 0; 308 globalTimeEnhancement = 0; 309 globalTimeLegacySync = 0; 310 globalTimeOther = 0; 311 globalResolveCount = 0; 312 // Reset detailed legacy sync timings 313 globalTimeClearLinked = 0; 314 globalTimeFillAttributes = 0; 315 globalTimeSyncColumns = 0; 316 globalTimePopulateOrphans = 0; 317 globalTimeClearHints = 0; 318 } 319 320 /** 321 * Get global performance timing breakdown for profiling across all resolve() calls. 322 * @return formatted timing information 323 */ 324 public static String getGlobalPerformanceTimings() { 325 long total = globalTimeScopeBuilder + globalTimeNameResolution + globalTimeEnhancement + globalTimeLegacySync + globalTimeOther; 326 return String.format( 327 "TSQLResolver2 Global Timings (across %d resolve() calls):\n" + 328 " ScopeBuilder: %d ms (%.1f%%)\n" + 329 " NameResolution: %d ms (%.1f%%)\n" + 330 " Enhancement: %d ms (%.1f%%)\n" + 331 " LegacySync: %d ms (%.1f%%)\n" + 332 " Other: %d ms (%.1f%%)\n" + 333 " Total: %d ms", 334 globalResolveCount, 335 globalTimeScopeBuilder, total > 0 ? 100.0 * globalTimeScopeBuilder / total : 0, 336 globalTimeNameResolution, total > 0 ? 100.0 * globalTimeNameResolution / total : 0, 337 globalTimeEnhancement, total > 0 ? 100.0 * globalTimeEnhancement / total : 0, 338 globalTimeLegacySync, total > 0 ? 100.0 * globalTimeLegacySync / total : 0, 339 globalTimeOther, total > 0 ? 100.0 * globalTimeOther / total : 0, 340 total); 341 } 342 343 /** 344 * Get performance timing breakdown for profiling. 345 * @return formatted timing information 346 */ 347 public String getPerformanceTimings() { 348 long total = timeScopeBuilder + timeNameResolution + timeEnhancement + timeLegacySync + timeOther; 349 return String.format( 350 "TSQLResolver2 Timings:\n" + 351 " ScopeBuilder: %d ms (%.1f%%)\n" + 352 " NameResolution: %d ms (%.1f%%)\n" + 353 " Enhancement: %d ms (%.1f%%)\n" + 354 " LegacySync: %d ms (%.1f%%)\n" + 355 " Other: %d ms (%.1f%%)\n" + 356 " Total: %d ms", 357 timeScopeBuilder, total > 0 ? 100.0 * timeScopeBuilder / total : 0, 358 timeNameResolution, total > 0 ? 100.0 * timeNameResolution / total : 0, 359 timeEnhancement, total > 0 ? 100.0 * timeEnhancement / total : 0, 360 timeLegacySync, total > 0 ? 100.0 * timeLegacySync / total : 0, 361 timeOther, total > 0 ? 100.0 * timeOther / total : 0, 362 total); 363 } 364 365 /** 366 * Perform resolution on all SQL statements 367 */ 368 public boolean resolve() { 369 // Reset timing counters 370 timeScopeBuilder = 0; 371 timeNameResolution = 0; 372 timeEnhancement = 0; 373 timeLegacySync = 0; 374 timeOther = 0; 375 376 // Setup logging 377 TLog.clearLogs(); 378 if (!TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 379 TLog.disableLog(); 380 } else { 381 TLog.enableAllLevelLog(); 382 } 383 384 try { 385 logInfo("Starting TSQLResolver2.resolve()"); 386 387 long startTime = System.currentTimeMillis(); 388 389 // Delta 1: Collect metadata from DDL statements if no SQLEnv provided 390 if (getSqlEnv() == null) { 391 collectBatchMetadata(); 392 } 393 394 // Delta 4: Track database context from USE/SET statements 395 trackDatabaseContext(); 396 397 // Phase 1: Build global scope (once for all passes) 398 buildGlobalScope(); 399 400 timeOther += System.currentTimeMillis() - startTime; 401 402 // Phase 2: Perform iterative resolution 403 // (automatically completes after first pass if no second pass is needed) 404 return performIterativeResolution(); 405 406 } catch (Exception e) { 407 logError("Exception in TSQLResolver2.resolve(): " + e.getMessage()); 408 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 409 e.printStackTrace(); 410 } 411 return false; 412 } finally { 413 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 414 TBaseType.dumpLogs(false); 415 } 416 } 417 } 418 419 /** 420 * Perform iterative resolution. 421 * Automatically converges after first pass if no additional passes are needed. 422 * 423 * Architecture: 424 * - Pass 1: Build scope tree + initial name resolution 425 * - Pass 2-N: Reuse scope tree, collect evidence, infer columns, re-resolve names 426 * 427 * This separation allows: 428 * 1. Scopes to accumulate inferred columns across iterations 429 * 2. Later scopes to reference earlier scopes' inferred columns 430 * 3. Forward references to be resolved in subsequent passes 431 */ 432 private boolean performIterativeResolution() { 433 logInfo("Performing iterative resolution (max iterations: " + config.getMaxIterations() + ")"); 434 435 int passNumber = 1; 436 ResolutionStatistics previousStats = null; 437 boolean continueIterating = true; 438 boolean scopesBuilt = false; 439 440 while (continueIterating) { 441 logInfo("=== Pass " + passNumber + " ==="); 442 443 // Create a resolution pass 444 ResolutionPass pass = new ResolutionPass(passNumber, previousStats); 445 446 if (passNumber == 1) { 447 // ========== PASS 1: Build scope tree + initial resolution ========== 448 logInfo("Pass 1: Building scope tree using ScopeBuilder and performing initial resolution"); 449 450 // Clear all state for fresh start 451 resolutionContext.clear(); 452 columnToScopeMap.clear(); 453 fromScopeIndexCache.clear(); 454 dmlIndexCache.clear(); 455 teradataNamedAliasCache.clear(); 456 allColumnReferences.clear(); 457 458 // Use ScopeBuilder to build complete scope tree (handles all nesting correctly) 459 long scopeBuilderStart = System.currentTimeMillis(); 460 scopeBuildResult = scopeBuilder.build(sqlStatements); 461 462 // Get global scope from builder 463 globalScope = scopeBuildResult.getGlobalScope(); 464 465 // Copy column references and scope mappings from ScopeBuildResult 466 columnToScopeMap.putAll(scopeBuildResult.getColumnToScopeMap()); 467 allColumnReferences.addAll(scopeBuildResult.getAllColumnReferences()); 468 timeScopeBuilder += System.currentTimeMillis() - scopeBuilderStart; 469 470 logInfo("ScopeBuilder complete: " + scopeBuildResult.getStatistics()); 471 logInfo("Built " + scopeBuildResult.getStatementScopeMap().size() + " SelectScopes"); 472 473 // Initialize NamespaceEnhancer with scope tree (caches star namespaces) 474 namespaceEnhancer.initialize(scopeBuildResult); 475 namespaceEnhancer.startPass(passNumber); 476 477 // Get SET clause target columns that should not be re-resolved 478 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 479 480 // Get INSERT ALL target columns that should not be re-resolved 481 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 482 483 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 484 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 485 486 // Perform initial name resolution for all collected columns 487 logInfo("Performing initial name resolution for " + allColumnReferences.size() + " column references"); 488 long nameResStart = System.currentTimeMillis(); 489 for (TObjectName objName : allColumnReferences) { 490 // Skip SET clause target columns - they already have sourceTable correctly set 491 // to the UPDATE target table and should NOT be resolved through star columns 492 if (setClauseTargetColumns.contains(objName)) { 493 continue; 494 } 495 496 // Skip INSERT ALL target columns - they already have sourceTable correctly set 497 // to the INSERT target table and should NOT be resolved against the subquery scope 498 if (insertAllTargetColumns.contains(objName)) { 499 continue; 500 } 501 502 IScope scope = columnToScopeMap.get(objName); 503 if (scope != null) { 504 nameResolver.resolve(objName, scope); 505 506 // Handle USING column priority for JOIN...USING syntax 507 handleUsingColumnResolution(objName); 508 509 // Handle Teradata NAMED alias resolution 510 handleTeradataNamedAliasResolution(objName); 511 handleQualifyClauseAliasResolution(objName); 512 513 // Handle subquery aliased/calculated column resolution 514 // Ensures aliased columns don't incorrectly trace to base tables 515 handleSubqueryAliasedColumnResolution(objName); 516 517 // Collect unresolved references for enhancement 518 collectForEnhancementIfNeeded(objName, scope); 519 } 520 } 521 522 // Restore sourceTable for MERGE INSERT VALUES columns after name resolution. 523 // Name resolution may have set an AMBIGUOUS resolution (e.g., column 'product' 524 // appears in both target and source tables through the ON clause). In MERGE 525 // semantics, WHEN NOT MATCHED VALUES columns always reference the USING (source) 526 // table. 527 // 528 // For AMBIGUOUS resolution: clear it so getSourceTable() returns the actual field 529 // value (the USING table). AMBIGUOUS means the column was found in both target and 530 // source namespaces, but semantically it must reference the source. 531 // 532 // For EXACT_MATCH resolution: keep it because it contains star column push-down 533 // tracing info (e.g., when USING is a subquery with SELECT *, the resolution 534 // traces the VALUES column to the physical table inside the subquery). 535 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 536 TObjectName col = entry.getKey(); 537 TTable usingTable = entry.getValue(); 538 ResolutionResult res = col.getResolution(); 539 if (res != null && res.isAmbiguous()) { 540 col.setResolution(null); 541 } 542 col.setSourceTable(usingTable); 543 } 544 545 timeNameResolution += System.currentTimeMillis() - nameResStart; 546 547 // Explicit Enhancement Phase: Add collected columns to namespaces 548 long enhanceStart = System.currentTimeMillis(); 549 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 550 timeEnhancement += System.currentTimeMillis() - enhanceStart; 551 logInfo("Pass 1 enhancement: " + enhanceResult.getTotalAdded() + " columns added to namespaces"); 552 553 scopesBuilt = true; 554 logInfo("Pass 1 complete. Resolved " + allColumnReferences.size() + " column references."); 555 556 557 } else { 558 // ========== PASS 2+: Explicit Enhancement + Re-resolve ========== 559 logInfo("Pass " + passNumber + ": Explicit namespace enhancement and re-resolution"); 560 561 // ======== Phase A: Start New Pass ======== 562 namespaceEnhancer.startPass(passNumber); 563 564 // ======== Phase B: Clear Resolution Results (keep scopes!) ======== 565 logInfo("Phase B: Clearing resolution results (scopes preserved)"); 566 resolutionContext.clear(); 567 568 // ======== Phase C: Re-resolve with Enhanced Namespaces ======== 569 logInfo("Phase C: Re-resolving with enhanced namespaces"); 570 571 // Get SET clause target columns that should not be re-resolved 572 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 573 574 // Get INSERT ALL target columns that should not be re-resolved 575 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 576 577 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 578 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 579 580 // Re-resolve all column references using their original scopes 581 // Scopes are reused from Pass 1, but namespaces may have been enhanced 582 for (TObjectName objName : allColumnReferences) { 583 // Skip SET clause target columns - they already have sourceTable correctly set 584 // to the UPDATE target table and should NOT be resolved through star columns 585 if (setClauseTargetColumns.contains(objName)) { 586 continue; 587 } 588 589 // Skip INSERT ALL target columns - they already have sourceTable correctly set 590 // to the INSERT target table and should NOT be resolved against the subquery scope 591 if (insertAllTargetColumns.contains(objName)) { 592 continue; 593 } 594 595 IScope scope = columnToScopeMap.get(objName); 596 if (scope != null) { 597 nameResolver.resolve(objName, scope); 598 599 // Handle USING column priority for JOIN...USING syntax 600 handleUsingColumnResolution(objName); 601 602 // Handle Teradata NAMED alias resolution 603 handleTeradataNamedAliasResolution(objName); 604 handleQualifyClauseAliasResolution(objName); 605 606 // Handle subquery aliased/calculated column resolution 607 // Ensures aliased columns don't incorrectly trace to base tables 608 handleSubqueryAliasedColumnResolution(objName); 609 610 // Collect for next enhancement pass if still targets star namespace 611 collectForEnhancementIfNeeded(objName, scope); 612 } 613 } 614 615 // Restore sourceTable for MERGE INSERT VALUES columns after re-resolution 616 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 617 ResolutionResult res = entry.getKey().getResolution(); 618 if (res != null && res.isAmbiguous()) { 619 entry.getKey().setResolution(null); 620 } 621 entry.getKey().setSourceTable(entry.getValue()); 622 } 623 624 // ======== Phase D: Explicit Namespace Enhancement ======== 625 logInfo("Phase D: Explicit namespace enhancement"); 626 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 627 logInfo("Pass " + passNumber + " enhancement: " + 628 enhanceResult.getTotalAdded() + " columns added, " + 629 enhanceResult.getTotalSkipped() + " skipped (existing)"); 630 631 // Legacy support: also run old evidence collection (if needed) 632 if (config.isEvidenceCollectionEnabled()) { 633 runLegacyEvidenceCollection(); 634 } 635 } 636 637 // Get statistics after this pass 638 ResolutionStatistics currentStats = getStatistics(); 639 pass.complete(currentStats); 640 641 // Record this pass 642 convergenceDetector.recordPass(pass); 643 passHistory.add(pass); 644 645 logInfo(pass.getSummary()); 646 647 // Check convergence 648 ConvergenceDetector.ConvergenceResult convergence = convergenceDetector.checkConvergence(); 649 if (convergence.hasConverged()) { 650 logInfo("Convergence detected: " + convergence.getReason()); 651 pass.setStopReason(convergence.getReason()); 652 continueIterating = false; 653 } else { 654 // Prepare for next pass 655 previousStats = currentStats; 656 passNumber++; 657 } 658 } 659 660 // Create cloned columns for star column tracing 661 // This is a CORE part of TSQLResolver2 - when a column traces through a CTE/subquery 662 // with SELECT * to a physical table, we create a cloned TObjectName with sourceTable 663 // pointing to the traced physical table. This ensures complete lineage tracking. 664 createTracedColumnClones(); 665 666 // Sync to legacy structures if enabled 667 if (config.isLegacyCompatibilityEnabled()) { 668 long syncStart = System.currentTimeMillis(); 669 syncToLegacyStructures(); 670 timeLegacySync += System.currentTimeMillis() - syncStart; 671 } 672 673 // Print final statistics 674 logInfo("Iterative resolution complete after " + passHistory.size() + " passes"); 675 ResolutionStatistics finalStats = getStatistics(); 676 logInfo("Final statistics: " + finalStats); 677 678 // Print namespace enhancement summary if in debug mode 679 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 680 logInfo("=== Namespace Enhancement Summary ==="); 681 logInfo("Total columns added: " + namespaceEnhancer.getTotalColumnsAdded()); 682 } 683 684 // Print performance timing breakdown 685 logInfo(getPerformanceTimings()); 686 687 // Accumulate to global timings for profiling 688 globalTimeScopeBuilder += timeScopeBuilder; 689 globalTimeNameResolution += timeNameResolution; 690 globalTimeEnhancement += timeEnhancement; 691 globalTimeLegacySync += timeLegacySync; 692 globalTimeOther += timeOther; 693 globalResolveCount++; 694 695 return true; 696 } 697 698 /** 699 * Run legacy evidence collection (deprecated). 700 * Kept for backward compatibility. 701 */ 702 @SuppressWarnings("deprecation") 703 private void runLegacyEvidenceCollection() { 704 logInfo("Running legacy evidence collection (deprecated)"); 705 706 gudusoft.gsqlparser.resolver2.inference.EvidenceCollector evidenceCollector = 707 new gudusoft.gsqlparser.resolver2.inference.EvidenceCollector(); 708 709 int evidenceCount = 0; 710 for (int i = 0; i < sqlStatements.size(); i++) { 711 Object stmt = sqlStatements.get(i); 712 if (stmt instanceof TSelectSqlStatement) { 713 List<gudusoft.gsqlparser.resolver2.inference.InferenceEvidence> stmtEvidence = 714 evidenceCollector.collectFromSelect((TSelectSqlStatement) stmt); 715 evidenceCount += stmtEvidence.size(); 716 } 717 } 718 719 logInfo("Legacy evidence collection: " + evidenceCount + " items"); 720 } 721 722 /** 723 * Get the namespace enhancer for external access to enhancement history. 724 * 725 * @return the namespace enhancer 726 */ 727 public NamespaceEnhancer getNamespaceEnhancer() { 728 return namespaceEnhancer; 729 } 730 731 /** 732 * Get a detailed enhancement report. 733 * 734 * @return detailed report string 735 */ 736 public String getEnhancementReport() { 737 return namespaceEnhancer.generateReport(); 738 } 739 740 /** 741 * Re-process a statement for name resolution only (without rebuilding scopes). 742 * This is used in Pass 2+ to re-resolve names using enhanced scopes. 743 * 744 * CRITICAL (Principle 1: Scope完全复用): 745 * - Scope tree is built ONCE in Pass 1 and completely reused in Pass 2+ 746 * - This method MUST NOT call processStatement() which rebuilds scopes 747 * - Instead, it iterates through allColumnReferences and re-resolves each 748 * column using its original scope from columnToScopeMap 749 * 750 * This allows: 751 * - Namespaces to be enhanced across iterations (Principle 2) 752 * - Star columns to benefit from reverse inference (Principle 3) 753 * - All previous inference results to be preserved 754 */ 755 private void reprocessStatementNamesOnly(Object statement) { 756 logDebug("Re-resolving column references without rebuilding scopes"); 757 758 // Re-resolve all column references using their original scopes 759 // The scopes are reused from Pass 1, but their namespaces may have been enhanced 760 for (TObjectName objName : allColumnReferences) { 761 IScope scope = columnToScopeMap.get(objName); 762 if (scope != null) { 763 // Re-resolve this column using the (potentially enhanced) scope 764 nameResolver.resolve(objName, scope); 765 766 // Handle USING column priority for JOIN...USING syntax 767 handleUsingColumnResolution(objName); 768 769 // Handle Teradata NAMED alias resolution 770 handleTeradataNamedAliasResolution(objName); 771 handleQualifyClauseAliasResolution(objName); 772 773 // Collect for next enhancement pass if still unresolved 774 collectForEnhancementIfNeeded(objName, scope); 775 } else { 776 logError("No scope found for column: " + objName); 777 } 778 } 779 } 780 781 /** 782 * Handle special resolution for USING columns in JOIN...USING syntax. 783 * In "a JOIN table2 USING (id)", the USING column exists in BOTH tables. 784 * - The synthetic column (clone) resolves to the right-side table (table2) 785 * - The original USING column resolves to the left-side table (a) 786 * 787 * @param objName The column reference 788 */ 789 private void handleUsingColumnResolution(TObjectName objName) { 790 if (objName == null || scopeBuildResult == null) return; 791 792 // Check if this is a synthetic USING column (should resolve to right table) 793 TTable rightTable = scopeBuildResult.getUsingColumnRightTable(objName); 794 if (rightTable != null) { 795 // This is the synthetic USING column - set its sourceTable to the right-side table 796 objName.setSourceTable(rightTable); 797 798 // Create a proper resolution with the right-side table 799 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 800 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 801 null, // no namespace for USING columns 802 objName.getColumnNameOnly(), 803 null, // no definition node 804 1.0, // high confidence 805 "using_column_right", 806 rightTable // override table - the right-side table of the JOIN 807 ); 808 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 809 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 810 811 // Update the TObjectName's resolution so formatter uses correct finalTable 812 objName.setResolution(result); 813 814 // Also register in ResolutionContext so getReferencesTo(table) can find it 815 resolutionContext.registerResolution(objName, result); 816 817 logDebug("USING column " + objName.getColumnNameOnly() + 818 " -> right-side table " + rightTable.getName()); 819 return; 820 } 821 822 // Check if this is the original USING column (should resolve to left table) 823 TTable leftTable = scopeBuildResult.getUsingColumnLeftTable(objName); 824 if (leftTable != null) { 825 // This is the original USING column - set its sourceTable to the left-side table 826 objName.setSourceTable(leftTable); 827 828 // Create a proper resolution with the left-side table 829 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 830 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 831 null, // no namespace for USING columns 832 objName.getColumnNameOnly(), 833 null, // no definition node 834 1.0, // high confidence 835 "using_column_left", 836 leftTable // override table - the left-side table of the JOIN 837 ); 838 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 839 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 840 841 // Update the TObjectName's resolution so formatter uses correct finalTable 842 objName.setResolution(result); 843 844 // Also register in ResolutionContext so getReferencesTo(table) can find it 845 resolutionContext.registerResolution(objName, result); 846 847 logDebug("USING column " + objName.getColumnNameOnly() + 848 " -> left-side table " + leftTable.getName()); 849 } 850 } 851 852 /** 853 * Handle Teradata NAMED alias resolution. 854 * 855 * <p>In Teradata, NAMED aliases defined in the SELECT list (using the {@code (NAMED alias)} syntax) 856 * can be referenced in the WHERE and QUALIFY clauses of the same SELECT statement. This is different 857 * from standard SQL where column aliases are only visible in ORDER BY.</p> 858 * 859 * <p>This method checks if a resolved column matches a NAMED alias from the enclosing SELECT list. 860 * If it does, the resolution is updated to indicate this is a calculated column (alias), not a 861 * physical column from the table.</p> 862 * 863 * <p>Example:</p> 864 * <pre> 865 * SELECT USI_ID, SUBS_ID, 866 * (CAST(:param AS TIMESTAMP(0)))(NAMED REPORT_DTTM) 867 * FROM PRD2_ODW.SUBS_USI_HISTORY 868 * WHERE stime <= REPORT_DTTM AND etime > REPORT_DTTM 869 * </pre> 870 * <p>Here, REPORT_DTTM references in WHERE should NOT be linked to PRD2_ODW.SUBS_USI_HISTORY 871 * because REPORT_DTTM is a NAMED alias, not a physical column.</p> 872 * 873 * @param objName The column reference to check 874 */ 875 private void handleTeradataNamedAliasResolution(TObjectName objName) { 876 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 877 878 // Only applies to Teradata 879 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 880 if (dbVendor != EDbVendor.dbvteradata) return; 881 882 String columnName = objName.getColumnNameOnly(); 883 if (columnName == null || columnName.isEmpty()) return; 884 885 // Only apply to UNQUALIFIED column references (no table prefix) 886 // If a column has a table qualifier like "CP.CALC_PLATFORM_ID", it's clearly 887 // referencing a specific table's column, not a NAMED alias 888 if (objName.getTableToken() != null) return; 889 890 // Get the scope for this column reference 891 IScope scope = columnToScopeMap.get(objName); 892 if (scope == null) return; 893 894 // Find the enclosing SELECT statement from the scope 895 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 896 if (enclosingSelect == null) return; 897 898 // Optimization C: Use cached index for O(1) lookup instead of O(N) iteration 899 Map<String, TResultColumn> aliasIndex = getTeradataNamedAliasIndex(enclosingSelect); 900 if (aliasIndex == null || aliasIndex.isEmpty()) return; 901 902 // Look up the result column by alias name (case-insensitive, stored as lowercase) 903 TResultColumn resultCol = aliasIndex.get(columnName.toLowerCase()); 904 if (resultCol == null) return; 905 906 // Skip if objName is part of this result column's expression 907 // This handles cases like "CAST(ID AS DECIMAL) AS ID" where the ID inside 908 // CAST is the source column, not a reference to the ID alias 909 if (isColumnWithinResultColumn(objName, resultCol)) { 910 return; 911 } 912 913 // Found a matching NAMED alias 914 // Clear the source table since this is an alias, not a physical column 915 objName.setSourceTable(null); 916 917 // Create a new ColumnSource with the TResultColumn as the definition node 918 // This will make isCalculatedColumn() return true 919 ColumnSource source = new ColumnSource( 920 null, // namespace - not from a table 921 columnName, 922 resultCol, // definition node - the TResultColumn with the alias 923 1.0, // high confidence 924 "teradata_named_alias" 925 ); 926 ResolutionResult result = ResolutionResult.exactMatch(source); 927 objName.setResolution(result); 928 resolutionContext.registerResolution(objName, result); 929 930 logDebug("Teradata NAMED alias: " + columnName + " -> alias from SELECT list"); 931 } 932 933 /** 934 * Handle QUALIFY clause alias resolution for Snowflake, BigQuery, and Databricks. 935 * 936 * <p>In Snowflake, BigQuery, and Databricks, column aliases defined in the SELECT list 937 * can be referenced in the QUALIFY clause. This is different from standard SQL where 938 * column aliases are only visible in ORDER BY.</p> 939 * 940 * <p>This method checks if a column reference in the QUALIFY clause matches an alias 941 * from the enclosing SELECT list. If it does, the resolution is updated to indicate 942 * this is a calculated column (alias), not a physical column from the table.</p> 943 * 944 * <p>Example:</p> 945 * <pre> 946 * SELECT RoomNumber, RoomType, BlockFloor, 947 * ROW_NUMBER() OVER (PARTITION BY RoomType ORDER BY BlockFloor) AS row_num 948 * FROM Hospital.Room 949 * QUALIFY row_num = 1 950 * </pre> 951 * <p>Here, row_num in QUALIFY should NOT be linked to Hospital.Room because 952 * row_num is an alias for the window function, not a physical column.</p> 953 * 954 * @param objName The column reference to check 955 */ 956 private void handleQualifyClauseAliasResolution(TObjectName objName) { 957 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 958 959 // Only applies to databases that support QUALIFY with alias visibility 960 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 961 if (dbVendor != EDbVendor.dbvsnowflake && 962 dbVendor != EDbVendor.dbvbigquery && 963 dbVendor != EDbVendor.dbvdatabricks) return; 964 965 String columnName = objName.getColumnNameOnly(); 966 if (columnName == null || columnName.isEmpty()) return; 967 968 // Only apply to UNQUALIFIED column references (no table prefix) 969 if (objName.getTableToken() != null) return; 970 971 // Check if this column is within a QUALIFY clause 972 if (!isInQualifyClause(objName)) return; 973 974 // Get the scope for this column reference 975 IScope scope = columnToScopeMap.get(objName); 976 if (scope == null) return; 977 978 // Find the enclosing SELECT statement from the scope 979 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 980 if (enclosingSelect == null) return; 981 982 // Look for a matching alias in the SELECT list 983 TResultColumnList resultColumns = enclosingSelect.getResultColumnList(); 984 if (resultColumns == null || resultColumns.size() == 0) return; 985 986 TResultColumn matchingResultCol = null; 987 for (int i = 0; i < resultColumns.size(); i++) { 988 TResultColumn resultCol = resultColumns.getResultColumn(i); 989 if (resultCol == null) continue; 990 991 // Check if this result column has an alias matching the column name 992 if (resultCol.getAliasClause() != null && 993 resultCol.getAliasClause().getAliasName() != null) { 994 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 995 if (aliasName != null && aliasName.equalsIgnoreCase(columnName)) { 996 matchingResultCol = resultCol; 997 break; 998 } 999 } 1000 } 1001 1002 if (matchingResultCol == null) return; 1003 1004 // Found a matching alias - clear the source table since this is an alias, not a physical column 1005 objName.setSourceTable(null); 1006 1007 // Create a new ColumnSource with the TResultColumn as the definition node 1008 // This will make isCalculatedColumn() return true 1009 ColumnSource source = new ColumnSource( 1010 null, // namespace - not from a table 1011 columnName, 1012 matchingResultCol, // definition node - the TResultColumn with the alias 1013 1.0, // high confidence 1014 "qualify_clause_alias" 1015 ); 1016 ResolutionResult result = ResolutionResult.exactMatch(source); 1017 objName.setResolution(result); 1018 resolutionContext.registerResolution(objName, result); 1019 1020 logDebug("QUALIFY clause alias: " + columnName + " -> alias from SELECT list"); 1021 } 1022 1023 /** 1024 * Check if a column reference is within a QUALIFY clause. 1025 * 1026 * @param objName The column reference to check 1027 * @return true if the column is within a QUALIFY clause 1028 */ 1029 private boolean isInQualifyClause(TObjectName objName) { 1030 if (objName == null) return false; 1031 1032 // Get the column's scope to find the enclosing SELECT statement 1033 IScope scope = columnToScopeMap.get(objName); 1034 if (scope == null) return false; 1035 1036 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 1037 if (enclosingSelect == null) return false; 1038 1039 // Check if this SELECT has a QUALIFY clause 1040 TQualifyClause qualifyClause = enclosingSelect.getQualifyClause(); 1041 if (qualifyClause == null) return false; 1042 1043 // Check if the column's token position is within the QUALIFY clause's range 1044 if (objName.getStartToken() != null && qualifyClause.getStartToken() != null && 1045 qualifyClause.getEndToken() != null) { 1046 long objPos = objName.getStartToken().posinlist; 1047 long qualifyStart = qualifyClause.getStartToken().posinlist; 1048 long qualifyEnd = qualifyClause.getEndToken().posinlist; 1049 1050 return objPos >= qualifyStart && objPos <= qualifyEnd; 1051 } 1052 1053 return false; 1054 } 1055 1056 /** 1057 * Gets or builds the Teradata NAMED alias index for a SELECT statement. 1058 * Optimization C: Caches the alias map for O(1) lookup instead of O(N) iteration. 1059 * 1060 * @param selectStmt The SELECT statement to get/build the index for 1061 * @return Map from lowercase alias name to TResultColumn, or null if no aliases 1062 */ 1063 private Map<String, TResultColumn> getTeradataNamedAliasIndex(TSelectSqlStatement selectStmt) { 1064 if (selectStmt == null) return null; 1065 1066 // Check cache first 1067 Map<String, TResultColumn> index = teradataNamedAliasCache.get(selectStmt); 1068 if (index != null) { 1069 return index; 1070 } 1071 1072 // Build index for this SELECT statement 1073 TResultColumnList resultColumns = selectStmt.getResultColumnList(); 1074 if (resultColumns == null || resultColumns.size() == 0) { 1075 // Cache empty map to avoid rebuilding 1076 index = java.util.Collections.emptyMap(); 1077 teradataNamedAliasCache.put(selectStmt, index); 1078 return index; 1079 } 1080 1081 index = new java.util.HashMap<>(); 1082 for (int i = 0; i < resultColumns.size(); i++) { 1083 TResultColumn resultCol = resultColumns.getResultColumn(i); 1084 if (resultCol == null) continue; 1085 1086 // Check if this result column has a NAMED alias 1087 if (resultCol.getAliasClause() != null && 1088 resultCol.getAliasClause().getAliasName() != null) { 1089 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 1090 if (aliasName != null && !aliasName.isEmpty()) { 1091 // Store with lowercase key for case-insensitive matching 1092 index.put(aliasName.toLowerCase(), resultCol); 1093 } 1094 } 1095 } 1096 1097 // Cache the index (even if empty, to avoid rebuilding) 1098 teradataNamedAliasCache.put(selectStmt, index); 1099 1100 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !index.isEmpty()) { 1101 logDebug("Built Teradata NAMED alias index for SELECT with " + index.size() + " aliases"); 1102 } 1103 1104 return index; 1105 } 1106 1107 /** 1108 * Check if a column reference (TObjectName) is within a result column's expression. 1109 * This is used to prevent treating source columns in expressions like "CAST(ID AS DECIMAL) AS ID" 1110 * as references to the alias. 1111 * 1112 * @param objName The column reference to check 1113 * @param resultCol The result column to check against 1114 * @return true if objName is within resultCol's expression tree 1115 */ 1116 private boolean isColumnWithinResultColumn(TObjectName objName, TResultColumn resultCol) { 1117 if (objName == null || resultCol == null) return false; 1118 1119 // Get the expression of the result column 1120 TExpression expr = resultCol.getExpr(); 1121 if (expr == null) return false; 1122 1123 // Check by comparing start/end positions 1124 // If objName's position is within resultCol's expression, it's part of it 1125 long objStart = objName.getStartToken() != null ? objName.getStartToken().posinlist : -1; 1126 long objEnd = objName.getEndToken() != null ? objName.getEndToken().posinlist : -1; 1127 long exprStart = expr.getStartToken() != null ? expr.getStartToken().posinlist : -1; 1128 long exprEnd = expr.getEndToken() != null ? expr.getEndToken().posinlist : -1; 1129 1130 if (objStart >= 0 && exprStart >= 0 && objEnd >= 0 && exprEnd >= 0) { 1131 return objStart >= exprStart && objEnd <= exprEnd; 1132 } 1133 1134 return false; 1135 } 1136 1137 /** 1138 * Handle subquery aliased/calculated column resolution. 1139 * 1140 * <p>When a column reference resolves through a subquery (or CTE containing subqueries), 1141 * and the underlying column is an alias or calculated expression, we should NOT trace 1142 * it to the base table. This method ensures that such columns have their sourceTable 1143 * cleared to prevent incorrect attribution.</p> 1144 * 1145 * <p>This is essential for queries like:</p> 1146 * <pre> 1147 * WITH DataCTE AS ( 1148 * SELECT t.col, COUNT(*) AS cnt FROM table1 t ... 1149 * ) 1150 * SELECT * FROM DataCTE 1151 * </pre> 1152 * <p>The 'cnt' column should NOT be traced to 'table1' because it's a calculated column.</p> 1153 * 1154 * @param objName The column reference to check 1155 */ 1156 private void handleSubqueryAliasedColumnResolution(TObjectName objName) { 1157 if (objName == null) return; 1158 1159 // Check if column has a table qualifier pointing to a subquery/CTE 1160 // If so, we should KEEP the sourceTable link for lineage tracing 1161 // The qualifier explicitly tells us which subquery the column belongs to 1162 String tableQualifier = objName.getTableString(); 1163 if (tableQualifier != null && !tableQualifier.isEmpty()) { 1164 IScope scope = columnToScopeMap.get(objName); 1165 if (scope != null) { 1166 TTable qualifiedTable = findTableByQualifier(scope, tableQualifier); 1167 if (qualifiedTable != null && 1168 (qualifiedTable.getSubquery() != null || qualifiedTable.getCTE() != null)) { 1169 // Column has qualifier pointing to a subquery/CTE 1170 // Keep the sourceTable link for lineage tracing (e.g., a.num_emp -> subquery a) 1171 // Don't clear sourceTable - this link is correct and needed 1172 logDebug("Subquery/CTE qualified column: " + objName.toString() + 1173 " - keeping sourceTable link to " + tableQualifier); 1174 return; 1175 } 1176 } 1177 } 1178 1179 // For unqualified columns (or columns qualified with base tables), 1180 // check if this is a calculated column or alias that should not trace to base tables 1181 ColumnSource source = objName.getColumnSource(); 1182 if (source != null) { 1183 if (source.isCalculatedColumn() || source.isColumnAlias()) { 1184 TTable currentSource = objName.getSourceTable(); 1185 if (currentSource != null) { 1186 // Only clear if sourceTable is a base table (not subquery/CTE) 1187 // For subquery/CTE references, keep the link for lineage tracing 1188 if (currentSource.getSubquery() == null && currentSource.getCTE() == null) { 1189 objName.setSourceTable(null); 1190 logDebug("Calculated/alias column: " + objName.getColumnNameOnly() + 1191 " cleared sourceTable (was " + currentSource.getName() + ") - not linked to base table"); 1192 } 1193 } 1194 } 1195 } 1196 } 1197 1198 /** 1199 * Gets or builds the FromScopeIndex for a scope (Performance Optimization B). 1200 * 1201 * <p>This method implements lazy initialization: the index is built on first access 1202 * and cached for subsequent lookups within the same resolution pass.</p> 1203 * 1204 * @param scope The scope to get the index for (SelectScope, UpdateScope, or FromScope) 1205 * @return The cached or newly built FromScopeIndex, or null if scope has no FROM clause 1206 */ 1207 private FromScopeIndex getFromScopeIndex(IScope scope) { 1208 if (scope == null) { 1209 return null; 1210 } 1211 1212 // Get the actual FromScope to use as cache key 1213 IScope fromScope = null; 1214 if (scope instanceof SelectScope) { 1215 fromScope = ((SelectScope) scope).getFromScope(); 1216 } else if (scope instanceof gudusoft.gsqlparser.resolver2.scope.UpdateScope) { 1217 fromScope = ((gudusoft.gsqlparser.resolver2.scope.UpdateScope) scope).getFromScope(); 1218 } else if (scope instanceof FromScope) { 1219 fromScope = scope; 1220 } 1221 1222 if (fromScope == null) { 1223 return null; 1224 } 1225 1226 // Check cache first (lazy initialization) 1227 FromScopeIndex index = fromScopeIndexCache.get(fromScope); 1228 if (index == null) { 1229 // Build index and cache it 1230 index = new FromScopeIndex(fromScope.getChildren()); 1231 fromScopeIndexCache.put(fromScope, index); 1232 1233 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1234 logDebug("Built FromScopeIndex for scope: " + index); 1235 } 1236 } 1237 1238 return index; 1239 } 1240 1241 /** 1242 * Find a table by its qualifier (alias or name) in the scope. 1243 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1244 */ 1245 private TTable findTableByQualifier(IScope scope, String qualifier) { 1246 if (scope == null || qualifier == null) return null; 1247 1248 // Use indexed lookup (Performance Optimization B) 1249 FromScopeIndex index = getFromScopeIndex(scope); 1250 if (index != null) { 1251 return index.findTableByQualifier(qualifier); 1252 } 1253 1254 return null; 1255 } 1256 1257 /** 1258 * Check if a column name is an alias (not a passthrough column) in the subquery. 1259 */ 1260 private boolean isColumnAnAliasInSubquery(TSelectSqlStatement subquery, String columnName) { 1261 if (subquery == null || columnName == null) return false; 1262 1263 TResultColumnList resultCols = subquery.getResultColumnList(); 1264 if (resultCols == null) return false; 1265 1266 for (int i = 0; i < resultCols.size(); i++) { 1267 TResultColumn rc = resultCols.getResultColumn(i); 1268 if (rc == null) continue; 1269 1270 // Check if this result column has an alias matching the column name 1271 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1272 String alias = rc.getAliasClause().getAliasName().toString(); 1273 if (alias != null && alias.equalsIgnoreCase(columnName)) { 1274 // Found matching alias - check if it's a calculated column 1275 TExpression expr = rc.getExpr(); 1276 if (expr != null) { 1277 // Not a simple column reference = calculated 1278 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1279 return true; 1280 } 1281 } 1282 } 1283 } 1284 1285 // Also check for SQL Server proprietary alias syntax: alias = expr 1286 // In this case, the alias is the column name itself 1287 String colName = getResultColumnName(rc); 1288 if (colName != null && colName.equalsIgnoreCase(columnName)) { 1289 TExpression expr = rc.getExpr(); 1290 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1291 return true; 1292 } 1293 } 1294 } 1295 return false; 1296 } 1297 1298 /** 1299 * Get the column name from a result column (handles aliases and SQL Server proprietary syntax). 1300 */ 1301 private String getResultColumnName(TResultColumn rc) { 1302 if (rc == null) return null; 1303 1304 // Check for explicit alias 1305 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1306 return rc.getAliasClause().getAliasName().toString(); 1307 } 1308 1309 // Check for SQL Server proprietary alias: alias = expr 1310 // In this case, the expression itself contains the alias 1311 TExpression expr = rc.getExpr(); 1312 if (expr != null && expr.getExpressionType() == EExpressionType.assignment_t) { 1313 // The left side is the alias 1314 if (expr.getLeftOperand() != null && expr.getLeftOperand().getObjectOperand() != null) { 1315 return expr.getLeftOperand().getObjectOperand().toString(); 1316 } 1317 } 1318 1319 return null; 1320 } 1321 1322 /** 1323 * Find the enclosing SELECT statement from a scope. 1324 * Traverses up the scope hierarchy to find a SelectScope and gets its node. 1325 * 1326 * @param scope The scope to start from 1327 * @return The enclosing SELECT statement, or null if not found 1328 */ 1329 private TSelectSqlStatement findEnclosingSelectFromScope(IScope scope) { 1330 if (scope == null) return null; 1331 1332 IScope currentScope = scope; 1333 int maxIterations = 100; // Prevent infinite loops 1334 int iterations = 0; 1335 1336 while (currentScope != null && iterations < maxIterations) { 1337 iterations++; 1338 1339 // Check if current scope is a SelectScope 1340 if (currentScope instanceof SelectScope) { 1341 TParseTreeNode node = currentScope.getNode(); 1342 if (node instanceof TSelectSqlStatement) { 1343 return (TSelectSqlStatement) node; 1344 } 1345 } 1346 1347 // Move up to parent scope 1348 currentScope = currentScope.getParent(); 1349 } 1350 return null; 1351 } 1352 1353 /** 1354 * Collect a column reference for namespace enhancement if it targets a star namespace. 1355 * This is called during resolution to gather columns that need to be added to namespaces. 1356 * 1357 * @param objName The column reference 1358 * @param scope The scope where the column should be resolved 1359 */ 1360 private void collectForEnhancementIfNeeded(TObjectName objName, IScope scope) { 1361 if (objName == null || scope == null) return; 1362 1363 String columnName = objName.getColumnNameOnly(); 1364 if (columnName == null || columnName.isEmpty()) return; 1365 1366 // Get the resolution result to check status 1367 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = objName.getResolution(); 1368 1369 // Find candidate namespace from scope's FROM clause 1370 INamespace candidateNamespace = findCandidateNamespace(objName, scope); 1371 1372 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1373 logInfo("[TSQLResolver2] collectForEnhancement: column=" + columnName + 1374 ", candidateNs=" + (candidateNamespace != null ? candidateNamespace.getDisplayName() : "null") + 1375 ", hasStar=" + (candidateNamespace != null ? candidateNamespace.hasStarColumn() : "N/A")); 1376 } 1377 1378 if (candidateNamespace != null) { 1379 // Determine confidence based on context 1380 double confidence = 0.7; // Default for unqualified reference 1381 String evidence = "outer_reference"; 1382 1383 // Higher confidence for qualified references (e.g., "a.column") 1384 if (objName.getTableToken() != null) { 1385 confidence = 0.9; 1386 evidence = "qualified_reference"; 1387 } 1388 1389 // Collect for enhancement 1390 namespaceEnhancer.collectColumnRef( 1391 columnName, 1392 candidateNamespace, 1393 objName, 1394 confidence, 1395 evidence 1396 ); 1397 } 1398 } 1399 1400 /** 1401 * Find the candidate namespace for a column reference. 1402 * Looks at the scope's FROM clause to find namespaces with star columns. 1403 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1404 */ 1405 private INamespace findCandidateNamespace(TObjectName objName, IScope scope) { 1406 // Use indexed lookup (Performance Optimization B) 1407 FromScopeIndex index = getFromScopeIndex(scope); 1408 if (index == null) { 1409 return null; 1410 } 1411 1412 String tablePrefix = objName.getTableToken() != null ? 1413 objName.getTableToken().toString() : null; 1414 1415 return index.findCandidateNamespace(tablePrefix); 1416 } 1417 1418 /** 1419 * Delta 1: Collect metadata from DDL statements in the batch. 1420 * 1421 * If no SQLEnv is provided, this method extracts table/column metadata 1422 * from CREATE TABLE and CREATE VIEW statements in the SQL batch and 1423 * creates a TSQLEnv for use during resolution. 1424 * 1425 * This enables standalone resolution of SQL batches that contain both 1426 * DDL and DML without requiring external metadata. 1427 */ 1428 private void collectBatchMetadata() { 1429 if (sqlStatements == null || sqlStatements.size() == 0) { 1430 return; 1431 } 1432 1433 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1434 BatchMetadataCollector collector = new BatchMetadataCollector(sqlStatements, vendor); 1435 TSQLEnv batchEnv = collector.collect(); 1436 1437 if (batchEnv != null) { 1438 setSqlEnv(batchEnv); 1439 logDebug("Collected batch-local DDL metadata into TSQLEnv"); 1440 } 1441 } 1442 1443 /** 1444 * Delta 4: Track database context from USE/SET statements. 1445 * 1446 * Scans the statement list for USE DATABASE, USE SCHEMA, SET SCHEMA, 1447 * and similar statements, and applies the context to TSQLEnv for 1448 * proper resolution of unqualified table names. 1449 */ 1450 private void trackDatabaseContext() { 1451 if (sqlStatements == null || sqlStatements.size() == 0) { 1452 return; 1453 } 1454 1455 DatabaseContextTracker tracker = new DatabaseContextTracker(); 1456 tracker.processStatements(sqlStatements); 1457 1458 // Apply context to TSQLEnv if any context was found 1459 if (tracker.hasContext()) { 1460 TSQLEnv env = getSqlEnv(); 1461 if (env != null) { 1462 tracker.applyDefaults(env); 1463 logDebug("Applied database context: " + tracker); 1464 } else { 1465 // Create a minimal TSQLEnv if none exists 1466 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1467 try { 1468 env = new TSQLEnv(vendor) { 1469 @Override 1470 public void initSQLEnv() { 1471 // Minimal initialization 1472 } 1473 }; 1474 tracker.applyDefaults(env); 1475 setSqlEnv(env); 1476 logDebug("Created minimal TSQLEnv with database context: " + tracker); 1477 } catch (Exception e) { 1478 // TSQLEnv creation failed - context will not be applied 1479 logDebug("Failed to create TSQLEnv for database context: " + e.getMessage()); 1480 } 1481 } 1482 } 1483 } 1484 1485 /** 1486 * Build the global scope 1487 */ 1488 private void buildGlobalScope() { 1489 logDebug("Building global scope"); 1490 1491 // Get SQLEnv and vendor for qualified name resolution 1492 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1493 EDbVendor vendor = EDbVendor.dbvoracle; // Default 1494 1495 // Try to get vendor from statements 1496 if (sqlStatements != null && sqlStatements.size() > 0) { 1497 vendor = sqlStatements.get(0).dbvendor; 1498 } 1499 1500 // Create global scope with sqlEnv and vendor for proper qualified name resolution 1501 globalScope = new GlobalScope(globalContext, config.getNameMatcher(), sqlEnv, vendor); 1502 1503 logDebug("GlobalScope created with defaults: catalog=" + 1504 globalScope.getDefaultCatalog() + ", schema=" + globalScope.getDefaultSchema()); 1505 } 1506 1507 /** 1508 * Process a single statement 1509 */ 1510 private void processStatement(Object statement) { 1511 if (statement instanceof TSelectSqlStatement) { 1512 processSelectStatement((TSelectSqlStatement) statement); 1513 } 1514 // TODO: Add support for INSERT, UPDATE, DELETE, etc. 1515 } 1516 1517 /** 1518 * Process a SELECT statement 1519 */ 1520 private void processSelectStatement(TSelectSqlStatement select) { 1521 processSelectStatement(select, globalScope); 1522 } 1523 1524 /** 1525 * Process a SELECT statement with a specific parent scope. 1526 * This is used for recursive processing of CTE subqueries. 1527 */ 1528 private void processSelectStatement(TSelectSqlStatement select, IScope givenParentScope) { 1529 logDebug("Processing SELECT statement"); 1530 1531 // Create SELECT scope (will be child of CTE scope if CTEs exist, otherwise child of given parent scope) 1532 IScope parentScope = givenParentScope; 1533 1534 // Process CTEs (WITH clause) if present 1535 CTEScope cteScope = null; 1536 if (select.getCteList() != null && select.getCteList().size() > 0) { 1537 cteScope = processCTEs(select.getCteList(), givenParentScope); 1538 parentScope = cteScope; // CTEs become parent of SELECT 1539 } 1540 1541 SelectScope selectScope = new SelectScope(parentScope, select); 1542 1543 // Process FROM clause 1544 if (select.tables != null && select.tables.size() > 0) { 1545 FromScope fromScope = processFromClause(select, selectScope); 1546 selectScope.setFromScope(fromScope); 1547 } 1548 1549 // Process column references in SELECT list 1550 if (select.getResultColumnList() != null) { 1551 List<TObjectName> selectListColumns = collectObjectNamesFromResultColumns(select.getResultColumnList()); 1552 processColumnReferences(selectListColumns, selectScope); 1553 } 1554 1555 // Process WHERE clause 1556 if (select.getWhereClause() != null && 1557 select.getWhereClause().getCondition() != null) { 1558 List<TObjectName> whereColumns = select.getWhereClause().getCondition().getColumnsInsideExpression(); 1559 processColumnReferences(whereColumns, selectScope); 1560 } 1561 1562 // Process GROUP BY clause 1563 GroupByScope groupByScope = null; 1564 if (select.getGroupByClause() != null) { 1565 groupByScope = processGroupBy(select, selectScope); 1566 } 1567 1568 // Process HAVING clause 1569 if (select.getGroupByClause() != null && 1570 select.getGroupByClause().getHavingClause() != null) { 1571 processHaving(select, selectScope, groupByScope); 1572 } 1573 1574 // Process ORDER BY clause 1575 if (select.getOrderbyClause() != null) { 1576 processOrderBy(select, selectScope); 1577 } 1578 } 1579 1580 /** 1581 * Process FROM clause and build FROM scope 1582 */ 1583 private FromScope processFromClause(TSelectSqlStatement select, IScope parentScope) { 1584 FromScope fromScope = new FromScope(parentScope, select.tables); 1585 1586 // Process each relation (table or join) 1587 ArrayList<TTable> relations = select.getRelations(); 1588 if (relations != null) { 1589 for (TTable table : relations) { 1590 processTableOrJoin(table, fromScope); 1591 } 1592 } 1593 1594 return fromScope; 1595 } 1596 1597 /** 1598 * Recursively process a table or join expression and add to FROM scope 1599 */ 1600 private void processTableOrJoin(TTable table, FromScope fromScope) { 1601 if (table.getTableType() == ETableSource.join) { 1602 // This is a JOIN - recursively process left and right tables 1603 TJoinExpr joinExpr = table.getJoinExpr(); 1604 if (joinExpr != null) { 1605 logDebug("Processing JOIN: " + joinExpr.getJointype()); 1606 1607 // Recursively process left table 1608 TTable leftTable = joinExpr.getLeftTable(); 1609 if (leftTable != null) { 1610 processTableOrJoin(leftTable, fromScope); 1611 } 1612 1613 // Recursively process right table 1614 TTable rightTable = joinExpr.getRightTable(); 1615 if (rightTable != null) { 1616 processTableOrJoin(rightTable, fromScope); 1617 } 1618 1619 // TODO: Create JoinScope to handle nullable semantics 1620 // For now, we just add the base tables to FROM scope 1621 } 1622 } else { 1623 // This is a base table (objectname, subquery, etc.) 1624 INamespace namespace = createNamespaceForTable(table); 1625 1626 // Validate namespace (load metadata) 1627 namespace.validate(); 1628 1629 // Determine alias 1630 String alias = table.getAliasName() != null 1631 ? table.getAliasName() 1632 : table.getName(); 1633 1634 // Add to FROM scope 1635 fromScope.addChild(namespace, alias, false); 1636 1637 logDebug("Added table to FROM scope: " + alias); 1638 } 1639 } 1640 1641 /** 1642 * Process CTEs (WITH clause) and build CTE scope 1643 */ 1644 private CTEScope processCTEs(TCTEList cteList, IScope parentScope) { 1645 CTEScope cteScope = new CTEScope(parentScope, cteList); 1646 logDebug("Processing WITH clause with " + cteList.size() + " CTE(s)"); 1647 1648 // Process each CTE in order (later CTEs can reference earlier ones) 1649 for (int i = 0; i < cteList.size(); i++) { 1650 TCTE cte = cteList.getCTE(i); 1651 1652 // Get CTE name 1653 String cteName = cte.getTableName() != null ? cte.getTableName().toString() : null; 1654 if (cteName == null) { 1655 logDebug("Skipping CTE with null name"); 1656 continue; 1657 } 1658 1659 // Get CTE subquery 1660 TSelectSqlStatement cteSubquery = cte.getSubquery(); 1661 if (cteSubquery == null) { 1662 logDebug("Skipping CTE '" + cteName + "' with null subquery"); 1663 continue; 1664 } 1665 1666 // Create CTENamespace 1667 CTENamespace cteNamespace = new CTENamespace( 1668 cte, 1669 cteName, 1670 cteSubquery, 1671 config.getNameMatcher() 1672 ); 1673 1674 // Validate namespace (load column metadata from subquery) 1675 cteNamespace.validate(); 1676 1677 // Add to CTE scope (makes it visible to later CTEs and main query) 1678 cteScope.addCTE(cteName, cteNamespace); 1679 1680 logDebug("Added CTE to scope: " + cteName + 1681 " (columns=" + cteNamespace.getExplicitColumns().size() + 1682 ", recursive=" + cteNamespace.isRecursive() + ")"); 1683 1684 // Recursively process CTE subquery 1685 // This ensures that: 1686 // 1. Columns within the CTE are properly resolved 1687 // 2. Nested CTEs within this CTE are handled 1688 // 3. Later CTEs can reference this CTE's columns 1689 logDebug("Recursively processing CTE subquery: " + cteName); 1690 processSelectStatement(cteSubquery, cteScope); 1691 } 1692 1693 return cteScope; 1694 } 1695 1696 /** 1697 * Process GROUP BY clause and build GROUP BY scope 1698 */ 1699 private GroupByScope processGroupBy(TSelectSqlStatement select, SelectScope selectScope) { 1700 GroupByScope groupByScope = new GroupByScope(selectScope, select.getGroupByClause()); 1701 logDebug("Processing GROUP BY clause"); 1702 1703 // Set the FROM scope for column resolution 1704 if (selectScope.getFromScope() != null) { 1705 groupByScope.setFromScope(selectScope.getFromScope()); 1706 } 1707 1708 // Process column references in GROUP BY items 1709 if (select.getGroupByClause().getItems() != null) { 1710 for (int i = 0; i < select.getGroupByClause().getItems().size(); i++) { 1711 gudusoft.gsqlparser.nodes.TGroupByItem item = select.getGroupByClause().getItems().getGroupByItem(i); 1712 if (item.getExpr() != null) { 1713 List<TObjectName> groupByColumns = item.getExpr().getColumnsInsideExpression(); 1714 processColumnReferences(groupByColumns, groupByScope); 1715 } 1716 } 1717 } 1718 1719 return groupByScope; 1720 } 1721 1722 /** 1723 * Process HAVING clause and build HAVING scope 1724 */ 1725 private void processHaving(TSelectSqlStatement select, SelectScope selectScope, GroupByScope groupByScope) { 1726 logDebug("Processing HAVING clause"); 1727 1728 HavingScope havingScope = new HavingScope( 1729 selectScope, 1730 select.getGroupByClause().getHavingClause() 1731 ); 1732 1733 // Set GROUP BY scope for grouped column resolution 1734 if (groupByScope != null) { 1735 havingScope.setGroupByScope(groupByScope); 1736 } 1737 1738 // Set SELECT scope for alias resolution 1739 havingScope.setSelectScope(selectScope); 1740 1741 // Process column references in HAVING condition 1742 List<TObjectName> havingColumns = select.getGroupByClause().getHavingClause().getColumnsInsideExpression(); 1743 processColumnReferences(havingColumns, havingScope); 1744 } 1745 1746 /** 1747 * Process ORDER BY clause and build ORDER BY scope 1748 */ 1749 private void processOrderBy(TSelectSqlStatement select, SelectScope selectScope) { 1750 logDebug("Processing ORDER BY clause"); 1751 1752 OrderByScope orderByScope = new OrderByScope(selectScope, select.getOrderbyClause()); 1753 1754 // Set SELECT scope for alias resolution 1755 orderByScope.setSelectScope(selectScope); 1756 1757 // Set FROM scope for direct column resolution (database-dependent) 1758 if (selectScope.getFromScope() != null) { 1759 orderByScope.setFromScope(selectScope.getFromScope()); 1760 } 1761 1762 // Process column references in ORDER BY items 1763 if (select.getOrderbyClause().getItems() != null) { 1764 for (int i = 0; i < select.getOrderbyClause().getItems().size(); i++) { 1765 gudusoft.gsqlparser.nodes.TOrderByItem item = select.getOrderbyClause().getItems().getOrderByItem(i); 1766 if (item.getSortKey() != null) { 1767 List<TObjectName> orderByColumns = item.getSortKey().getColumnsInsideExpression(); 1768 processColumnReferences(orderByColumns, orderByScope); 1769 } 1770 } 1771 } 1772 } 1773 1774 /** 1775 * Create appropriate namespace for a table 1776 */ 1777 private INamespace createNamespaceForTable(TTable table) { 1778 // Check if it's a subquery 1779 if (table.getSubquery() != null) { 1780 return new SubqueryNamespace( 1781 table.getSubquery(), 1782 table.getAliasName(), 1783 config.getNameMatcher() 1784 ); 1785 } 1786 1787 // Regular table - pass sqlEnv and vendor for qualified name resolution 1788 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1789 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 1790 return new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 1791 } 1792 1793 /** 1794 * Collect all TObjectName from TResultColumnList 1795 */ 1796 private List<TObjectName> collectObjectNamesFromResultColumns( 1797 gudusoft.gsqlparser.nodes.TResultColumnList resultColumns) { 1798 List<TObjectName> objNames = new ArrayList<>(); 1799 1800 for (int i = 0; i < resultColumns.size(); i++) { 1801 gudusoft.gsqlparser.nodes.TResultColumn rc = resultColumns.getResultColumn(i); 1802 if (rc.getExpr() != null) { 1803 // Get all column references from the expression 1804 List<TObjectName> exprColumns = rc.getExpr().getColumnsInsideExpression(); 1805 if (exprColumns != null) { 1806 objNames.addAll(exprColumns); 1807 } 1808 } 1809 } 1810 1811 return objNames; 1812 } 1813 1814 /** 1815 * Process column references (TObjectName list) 1816 */ 1817 private void processColumnReferences(List<TObjectName> objectNames, IScope scope) { 1818 if (objectNames == null) return; 1819 1820 for (TObjectName objName : objectNames) { 1821 // Record column-to-scope mapping for iterative resolution (Principle 1) 1822 columnToScopeMap.put(objName, scope); 1823 allColumnReferences.add(objName); 1824 1825 // Resolve the column reference 1826 nameResolver.resolve(objName, scope); 1827 1828 // Handle USING column priority for JOIN...USING syntax 1829 handleUsingColumnResolution(objName); 1830 1831 // Handle Teradata NAMED alias resolution 1832 handleTeradataNamedAliasResolution(objName); 1833 handleQualifyClauseAliasResolution(objName); 1834 } 1835 } 1836 1837 // Detailed legacy sync timing (for profiling) 1838 private static long globalTimeClearLinked = 0; 1839 private static long globalTimeFillAttributes = 0; 1840 private static long globalTimeSyncColumns = 0; 1841 private static long globalTimePopulateOrphans = 0; 1842 private static long globalTimeClearHints = 0; 1843 1844 /** 1845 * Get detailed legacy sync timing breakdown. 1846 */ 1847 public static String getLegacySyncTimings() { 1848 long total = globalTimeClearLinked + globalTimeFillAttributes + globalTimeSyncColumns + globalTimePopulateOrphans + globalTimeClearHints; 1849 return String.format( 1850 "LegacySync Breakdown:\n" + 1851 " ClearLinkedColumns: %d ms (%.1f%%)\n" + 1852 " FillTableAttributes: %d ms (%.1f%%)\n" + 1853 " SyncColumnToLegacy: %d ms (%.1f%%)\n" + 1854 " PopulateOrphanColumns: %d ms (%.1f%%)\n" + 1855 " ClearSyntaxHints: %d ms (%.1f%%)\n" + 1856 " Total: %d ms", 1857 globalTimeClearLinked, total > 0 ? 100.0 * globalTimeClearLinked / total : 0, 1858 globalTimeFillAttributes, total > 0 ? 100.0 * globalTimeFillAttributes / total : 0, 1859 globalTimeSyncColumns, total > 0 ? 100.0 * globalTimeSyncColumns / total : 0, 1860 globalTimePopulateOrphans, total > 0 ? 100.0 * globalTimePopulateOrphans / total : 0, 1861 globalTimeClearHints, total > 0 ? 100.0 * globalTimeClearHints / total : 0, 1862 total); 1863 } 1864 1865 /** 1866 * Create cloned columns for star column tracing. 1867 * 1868 * <p>This is a CORE part of TSQLResolver2's name resolution. When a column traces 1869 * through a CTE or subquery with SELECT * to a physical table, we create a cloned 1870 * TObjectName with sourceTable pointing to the traced physical table. 1871 * 1872 * <p>Example: 1873 * <pre> 1874 * WITH cte AS (SELECT * FROM physical_table) 1875 * SELECT a FROM cte 1876 * </pre> 1877 * 1878 * <p>For column 'a' in the outer SELECT: 1879 * <ul> 1880 * <li>Original column: sourceTable = cte (immediate source)</li> 1881 * <li>Cloned column: sourceTable = physical_table (traced through star)</li> 1882 * </ul> 1883 * 1884 * <p>Both columns are added to allColumnReferences for complete lineage tracking. 1885 * This ensures the formatter can output both the immediate source and the traced 1886 * physical table when needed. 1887 */ 1888 private void createTracedColumnClones() { 1889 // Collect clones to add (avoid ConcurrentModificationException) 1890 java.util.List<TObjectName> clonesToAdd = new java.util.ArrayList<>(); 1891 1892 for (TObjectName column : allColumnReferences) { 1893 // Skip star columns - they represent all columns from a table and shouldn't be cloned 1894 String colName = column.getColumnNameOnly(); 1895 if (colName != null && colName.equals("*")) { 1896 continue; 1897 } 1898 1899 // Skip columns without resolution 1900 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = column.getResolution(); 1901 if (resolution == null || !resolution.isExactMatch()) { 1902 continue; 1903 } 1904 1905 gudusoft.gsqlparser.resolver2.model.ColumnSource source = resolution.getColumnSource(); 1906 if (source == null) { 1907 continue; 1908 } 1909 1910 TTable sourceTable = column.getSourceTable(); 1911 if (sourceTable == null) { 1912 continue; 1913 } 1914 1915 // Only process CTE or subquery columns 1916 if (!sourceTable.isCTEName() && sourceTable.getTableType() != ETableSource.subquery) { 1917 continue; 1918 } 1919 1920 // Get the traced physical table 1921 TTable finalTable = source.getFinalTable(); 1922 if (finalTable == null || finalTable == sourceTable) { 1923 continue; 1924 } 1925 1926 // Skip if finalTable is also a CTE or subquery 1927 if (finalTable.isCTEName() || finalTable.getTableType() == ETableSource.subquery) { 1928 continue; 1929 } 1930 1931 // Skip subquery columns when the column matches an explicit column in the subquery's 1932 // SELECT list. Cloning is only needed when tracing through star columns. 1933 // For example, in "SELECT al1.COL1, al1.COL3 FROM (SELECT t1.COL1, t2.* FROM t1, t2) al1": 1934 // - al1.COL1 matches explicit "t1.COL1" -> don't clone (stays at subquery level) 1935 // - al1.COL3 doesn't match explicit column, must come from t2.* -> clone to t2 1936 if (sourceTable.getTableType() == ETableSource.subquery) { 1937 TSelectSqlStatement subquery = sourceTable.getSubquery(); 1938 if (subquery != null && subqueryHasExplicitColumn(subquery, colName)) { 1939 continue; 1940 } 1941 } 1942 1943 // Skip UNION scenarios - syncToLegacyStructures already handles linking to all 1944 // UNION branch tables via getAllFinalTables(). Creating clones would cause duplicates. 1945 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 1946 if (allFinalTables != null && allFinalTables.size() > 1) { 1947 continue; 1948 } 1949 1950 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 1951 // subquery's underlying table via star column expansion. 1952 // This is particularly important for MERGE ON clause columns which may 1953 // belong to the target table rather than the source subquery. 1954 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 1955 // the source subquery. 1956 // Note: We check location only because ownStmt may be null for unresolved columns. 1957 if (column.getLocation() == ESqlClause.joinCondition 1958 && (column.getTableString() == null || column.getTableString().isEmpty())) { 1959 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1960 logInfo("createTracedColumnClones: Skipping unqualified join condition column " + column.toString() + 1961 " - should not be traced to subquery's underlying table"); 1962 } 1963 continue; 1964 } 1965 1966 // Check if a column with same name and same finalTable already exists 1967 // (colName was already extracted above when checking for star columns) 1968 boolean alreadyExists = false; 1969 for (TObjectName existing : allColumnReferences) { 1970 if (existing.getSourceTable() == finalTable && 1971 colName != null && colName.equalsIgnoreCase(existing.getColumnNameOnly())) { 1972 alreadyExists = true; 1973 break; 1974 } 1975 } 1976 // Also check in clonesToAdd 1977 if (!alreadyExists) { 1978 for (TObjectName clone : clonesToAdd) { 1979 if (clone.getSourceTable() == finalTable && 1980 colName != null && colName.equalsIgnoreCase(clone.getColumnNameOnly())) { 1981 alreadyExists = true; 1982 break; 1983 } 1984 } 1985 } 1986 1987 if (!alreadyExists) { 1988 // Clone the column and set sourceTable to the traced physical table 1989 TObjectName clonedColumn = column.clone(); 1990 clonedColumn.setSourceTable(finalTable); 1991 clonesToAdd.add(clonedColumn); 1992 1993 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1994 logInfo("createTracedColumnClones: Cloned column " + column.toString() + 1995 " with sourceTable traced from " + sourceTable.getTableName() + 1996 " to physical table " + finalTable.getTableName()); 1997 } 1998 } 1999 } 2000 2001 // Add all clones to allColumnReferences (local copy in TSQLResolver2) 2002 allColumnReferences.addAll(clonesToAdd); 2003 2004 // Also add to scopeBuildResult so consumers using scopeBuildResult.getAllColumnReferences() 2005 // (like TestGetTableColumn2 for star column expansion tests) can see the clones 2006 if (scopeBuildResult != null && !clonesToAdd.isEmpty()) { 2007 scopeBuildResult.addColumnReferences(clonesToAdd); 2008 } 2009 2010 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !clonesToAdd.isEmpty()) { 2011 logInfo("createTracedColumnClones: Created " + clonesToAdd.size() + " traced column clones"); 2012 } 2013 } 2014 2015 /** 2016 * Sync results to legacy structures for backward compatibility. 2017 * This populates: 2018 * - TTable.linkedColumns: columns resolved to this table 2019 * - TObjectName.sourceTable: already set in setResolution() 2020 * - TObjectName.linkedColumnDef: from ColumnSource.definitionNode 2021 * - TObjectName.sourceColumn: from ColumnSource.definitionNode (if TResultColumn) 2022 */ 2023 private void syncToLegacyStructures() { 2024 if (!config.isLegacyCompatibilityEnabled()) { 2025 logInfo("Legacy compatibility disabled, skipping sync"); 2026 return; 2027 } 2028 2029 for (int i = 0; i < sqlStatements.size(); i++) { 2030 } 2031 2032 logInfo("Syncing to legacy structures..."); 2033 2034 long phaseStart; 2035 2036 // Clear existing linkedColumns on all tables 2037 phaseStart = System.currentTimeMillis(); 2038 clearAllLinkedColumns(); 2039 2040 // Clear existing orphanColumns on all statements 2041 // These will be repopulated in Phase 4b based on TSQLResolver2 resolution 2042 for (int i = 0; i < sqlStatements.size(); i++) { 2043 clearOrphanColumnsRecursive(sqlStatements.get(i)); 2044 } 2045 globalTimeClearLinked += System.currentTimeMillis() - phaseStart; 2046 2047 // Phase 1: Fill TTable.getAttributes() for all tables 2048 // This uses the namespace data already collected during resolution 2049 phaseStart = System.currentTimeMillis(); 2050 Set<TTable> processedTables = new HashSet<>(); 2051 for (int i = 0; i < sqlStatements.size(); i++) { 2052 fillTableAttributesRecursive(sqlStatements.get(i), processedTables); 2053 } 2054 globalTimeFillAttributes += System.currentTimeMillis() - phaseStart; 2055 logInfo("Filled attributes for " + processedTables.size() + " tables"); 2056 2057 // Phase 2: Iterate through all column references and sync to legacy structures 2058 phaseStart = System.currentTimeMillis(); 2059 int syncCount = 0; 2060 for (TObjectName column : allColumnReferences) { 2061 if (syncColumnToLegacy(column)) { 2062 syncCount++; 2063 } 2064 } 2065 globalTimeSyncColumns += System.currentTimeMillis() - phaseStart; 2066 2067 // Phase 3: Link CTAS target table columns 2068 // For CREATE TABLE AS SELECT, the SELECT list columns should be linked to the target table 2069 for (int i = 0; i < sqlStatements.size(); i++) { 2070 linkCTASTargetTableColumns(sqlStatements.get(i)); 2071 } 2072 2073 // Phase 4: Sync implicit database/schema from USE DATABASE/USE SCHEMA to AST 2074 // This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2075 syncImplicitDbSchemaToAST(); 2076 2077 // Phase 4b: Populate orphan columns 2078 // Columns with sourceTable=null (unresolved or ambiguous) should be added to 2079 // their containing statement's orphanColumns list. This enables TGetTableColumn 2080 // to report them as orphan columns (with linkOrphanColumnToFirstTable option). 2081 phaseStart = System.currentTimeMillis(); 2082 populateOrphanColumns(); 2083 globalTimePopulateOrphans += System.currentTimeMillis() - phaseStart; 2084 2085 // Phase 4c: Expand star columns using push-down inferred columns 2086 // For SELECT * and SELECT table.*, expand to individual columns based on: 2087 // 1. Inferred columns from the namespace (via push-down algorithm) 2088 // 2. This enables star column expansion without TSQLEnv metadata 2089 phaseStart = System.currentTimeMillis(); 2090 expandStarColumnsUsingPushDown(); 2091 long expandTime = System.currentTimeMillis() - phaseStart; 2092 logInfo("Star column expansion took " + expandTime + "ms"); 2093 2094 // Phase 5: Clear orphan column syntax hints for resolved columns 2095 // The old resolver adds "sphint" (syntax hint) warnings for columns that can't be resolved. 2096 // TSQLResolver2 resolves these columns but doesn't clear the syntax hints. 2097 // This phase cleans up those hints to maintain compatibility with tests expecting no hints. 2098 phaseStart = System.currentTimeMillis(); 2099 clearOrphanColumnSyntaxHints(); 2100 globalTimeClearHints += System.currentTimeMillis() - phaseStart; 2101 2102 logInfo("Legacy sync complete: " + syncCount + "/" + allColumnReferences.size() + " columns synced"); 2103 } 2104 2105 /** 2106 * Link SELECT list columns to CTAS target table. 2107 * For CREATE TABLE AS SELECT statements, the output column names (aliases) 2108 * should be linked to the target table. The source column references 2109 * remain linked to their source tables. 2110 * 2111 * NOTE: For CTAS, the parser (TCreateTableSqlStatement.doParseStatement) already 2112 * correctly creates and links alias columns to the target table. The source columns 2113 * that were incorrectly added are filtered out in clearLinkedColumnsRecursive(). 2114 * This method now only handles cases where the parser didn't create alias columns. 2115 */ 2116 private void linkCTASTargetTableColumns(TCustomSqlStatement stmt) { 2117 if (stmt == null) return; 2118 2119 // CTAS columns are already handled by the parser (TCreateTableSqlStatement.doParseStatement) 2120 // and incorrectly added source columns are filtered in clearLinkedColumnsRecursive(). 2121 // No additional processing needed here for CTAS. 2122 2123 // Process nested statements (for other statement types that might need CTAS handling) 2124 for (int i = 0; i < stmt.getStatements().size(); i++) { 2125 linkCTASTargetTableColumns(stmt.getStatements().get(i)); 2126 } 2127 } 2128 2129 /** 2130 * Populate orphanColumns for unresolved columns. 2131 * Columns with sourceTable=null should be added to their containing statement's orphanColumns. 2132 * This enables TGetTableColumn to report these as "missed" columns. 2133 */ 2134 private void populateOrphanColumns() { 2135 int addedCount = 0; 2136 for (TObjectName column : allColumnReferences) { 2137 if (column == null) continue; 2138 2139 // Skip non-column types that should not be in orphan columns 2140 EDbObjectType dbObjectType = column.getDbObjectType(); 2141 if (dbObjectType == EDbObjectType.column_alias // alias clause column definitions (e.g., AS x (numbers, animals)) 2142 || dbObjectType == EDbObjectType.variable // stored procedure variables 2143 || dbObjectType == EDbObjectType.parameter // stored procedure parameters 2144 || dbObjectType == EDbObjectType.cursor // cursors 2145 || dbObjectType == EDbObjectType.constant // constants 2146 || dbObjectType == EDbObjectType.label // labels 2147 ) { 2148 continue; 2149 } 2150 2151 // Check resolution status directly - ambiguous columns should be added to orphanColumns 2152 // Note: column.getColumnSource() returns the first candidate for ambiguous columns, 2153 // which would cause them to be incorrectly skipped. We need to check the resolution status first. 2154 // IMPORTANT: This check must come BEFORE the sourceTable check because Phase 1 (linkColumnToTable) 2155 // might have already set sourceTable during parsing, but TSQLResolver2 correctly marked it as ambiguous. 2156 // NOTE: Skip star columns (*) since they are handled specially via sourceTableList 2157 ResolutionResult resolution = column.getResolution(); 2158 String columnName = column.getColumnNameOnly(); 2159 boolean isStarColumn = columnName != null && columnName.equals("*"); 2160 2161 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS && !isStarColumn) { 2162 // Ambiguous columns should be added to orphanColumns so they appear as "missed" 2163 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) so the column 2164 // doesn't also appear as resolved in the output 2165 if (column.getSourceTable() != null) { 2166 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2167 logInfo("populateOrphanColumns: Clearing sourceTable for AMBIGUOUS column: " + column.toString() 2168 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2169 + " was linked to " + column.getSourceTable().getTableName() 2170 + " with " + (resolution.getAmbiguousSource() != null ? 2171 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 2172 } 2173 column.setSourceTable(null); 2174 } 2175 // Fall through to add to orphanColumns 2176 } else { 2177 // Star columns (*) should NEVER be orphan columns - they represent all columns 2178 // from all tables and are handled specially via sourceTableList and linked 2179 // to tables in syncColumnToLegacy() which runs after this phase. 2180 if (isStarColumn) { 2181 continue; 2182 } 2183 2184 // For non-ambiguous columns, skip if they have a sourceTable 2185 if (column.getSourceTable() != null) { 2186 continue; 2187 } 2188 2189 // Also skip columns that have a ColumnSource with a valid table 2190 ColumnSource source = column.getColumnSource(); 2191 if (source != null) { 2192 TTable finalTable = source.getFinalTable(); 2193 if (finalTable != null) { 2194 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2195 logInfo("populateOrphanColumns: Skipping column with ColumnSource: " + column.toString() 2196 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2197 + " -> resolved to " + finalTable.getTableName()); 2198 } 2199 continue; 2200 } 2201 // Also check overrideTable for derived table columns 2202 TTable overrideTable = source.getOverrideTable(); 2203 if (overrideTable != null) { 2204 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2205 logInfo("populateOrphanColumns: Skipping column with ColumnSource (override): " + column.toString() 2206 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2207 + " -> resolved to " + overrideTable.getTableName()); 2208 } 2209 continue; 2210 } 2211 } 2212 } 2213 2214 // Debug: log columns being added to orphan 2215 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2216 ColumnSource debugSource = column.getColumnSource(); 2217 logInfo("populateOrphanColumns: Adding orphan column: " + column.toString() 2218 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2219 + ", hasColumnSource=" + (debugSource != null) 2220 + (debugSource != null ? ", namespace=" + (debugSource.getSourceNamespace() != null ? 2221 debugSource.getSourceNamespace().getClass().getSimpleName() : "null") : "")); 2222 } 2223 2224 // Find the containing statement for this column 2225 TCustomSqlStatement containingStmt = findContainingStatement(column); 2226 if (containingStmt != null) { 2227 // Set ownStmt so TSQLResolver2ResultFormatter can use getOwnStmt().getFirstPhysicalTable() 2228 // to link orphan columns to the first physical table (matching TGetTableColumn behavior) 2229 column.setOwnStmt(containingStmt); 2230 2231 TObjectNameList orphanColumns = containingStmt.getOrphanColumns(); 2232 if (orphanColumns != null && !containsColumn(orphanColumns, column)) { 2233 orphanColumns.addObjectName(column); 2234 addedCount++; 2235 } 2236 } else { 2237 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2238 logInfo("Could not find containing statement for orphan column: " + column.toString()); 2239 } 2240 } 2241 } 2242 logInfo("Populated " + addedCount + " orphan columns"); 2243 } 2244 2245 /** 2246 * Find the statement that contains a column reference. 2247 * First tries to use the scope information (more reliable), then falls back to AST traversal. 2248 * For PL/SQL blocks, searches for the innermost DML statement that contains the column. 2249 */ 2250 private TCustomSqlStatement findContainingStatement(TObjectName column) { 2251 // First, try to use the scope information from columnToScopeMap 2252 // The scope's node is typically the containing statement 2253 IScope scope = columnToScopeMap.get(column); 2254 if (scope != null) { 2255 TParseTreeNode scopeNode = scope.getNode(); 2256 if (scopeNode instanceof TCustomSqlStatement) { 2257 TCustomSqlStatement stmt = (TCustomSqlStatement) scopeNode; 2258 // If the scope is a PL/SQL block or procedure, search for DML statements within it 2259 // that actually contain the column (by line number) 2260 if (isPLSQLBlockStatement(stmt)) { 2261 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2262 if (dmlStmt != null) { 2263 return dmlStmt; 2264 } 2265 } 2266 return stmt; 2267 } 2268 } 2269 2270 // Fallback: traverse up the AST to find the nearest TCustomSqlStatement parent 2271 TParseTreeNode node = column; 2272 while (node != null) { 2273 if (node instanceof TCustomSqlStatement) { 2274 return (TCustomSqlStatement) node; 2275 } 2276 node = node.getParentObjectName(); 2277 } 2278 2279 // Last resort: search all statements for a DML statement containing the column 2280 TCustomSqlStatement result = null; 2281 if (sqlStatements.size() > 0) { 2282 for (int i = 0; i < sqlStatements.size(); i++) { 2283 TCustomSqlStatement stmt = sqlStatements.get(i); 2284 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2285 if (dmlStmt != null) { 2286 result = dmlStmt; 2287 break; 2288 } 2289 } 2290 if (result == null) { 2291 result = sqlStatements.get(0); 2292 } 2293 } 2294 return result; 2295 } 2296 2297 /** 2298 * Check if a statement is a PL/SQL block type statement. 2299 */ 2300 private boolean isPLSQLBlockStatement(TCustomSqlStatement stmt) { 2301 if (stmt == null) return false; 2302 String className = stmt.getClass().getSimpleName(); 2303 return className.startsWith("TPlsql") || className.startsWith("TPLSql") || 2304 className.contains("Block") || className.contains("Procedure") || 2305 className.contains("Function") || className.contains("Package"); 2306 } 2307 2308 /** 2309 * DML Statement Range for efficient line-based lookup. 2310 * Used by the DML index cache (Performance Optimization A). 2311 */ 2312 private static class DmlRange implements Comparable<DmlRange> { 2313 final long startLine; 2314 final long endLine; 2315 final TCustomSqlStatement stmt; 2316 2317 DmlRange(TCustomSqlStatement stmt) { 2318 this.stmt = stmt; 2319 this.startLine = stmt.getStartToken() != null ? stmt.getStartToken().lineNo : -1; 2320 this.endLine = stmt.getEndToken() != null ? stmt.getEndToken().lineNo : -1; 2321 } 2322 2323 boolean contains(long line) { 2324 return startLine >= 0 && startLine <= line && line <= endLine; 2325 } 2326 2327 // Sort by startLine for binary search 2328 @Override 2329 public int compareTo(DmlRange other) { 2330 return Long.compare(this.startLine, other.startLine); 2331 } 2332 } 2333 2334 /** 2335 * Cache for DML statement ranges per parent statement (Performance Optimization A). 2336 * Built lazily on first access, cleared at start of each resolve() call. 2337 * Uses IdentityHashMap because we need object identity, not equals(). 2338 */ 2339 private final Map<TCustomSqlStatement, List<DmlRange>> dmlIndexCache = new IdentityHashMap<>(); 2340 2341 /** 2342 * Build DML index for a parent statement. 2343 */ 2344 private List<DmlRange> buildDmlIndex(TCustomSqlStatement parent) { 2345 final List<DmlRange> ranges = new ArrayList<>(); 2346 parent.acceptChildren(new TParseTreeVisitor() { 2347 @Override 2348 public void preVisit(TInsertSqlStatement stmt) { 2349 ranges.add(new DmlRange(stmt)); 2350 } 2351 @Override 2352 public void preVisit(TUpdateSqlStatement stmt) { 2353 ranges.add(new DmlRange(stmt)); 2354 } 2355 @Override 2356 public void preVisit(TDeleteSqlStatement stmt) { 2357 ranges.add(new DmlRange(stmt)); 2358 } 2359 @Override 2360 public void preVisit(TSelectSqlStatement stmt) { 2361 ranges.add(new DmlRange(stmt)); 2362 } 2363 }); 2364 // Sort by startLine for efficient lookup 2365 java.util.Collections.sort(ranges); 2366 return ranges; 2367 } 2368 2369 /** 2370 * Get or build the DML index for a parent statement (Performance Optimization A). 2371 */ 2372 private List<DmlRange> getDmlIndex(TCustomSqlStatement parent) { 2373 return dmlIndexCache.computeIfAbsent(parent, this::buildDmlIndex); 2374 } 2375 2376 /** 2377 * Find the innermost DML statement (INSERT/UPDATE/DELETE/SELECT) within a parent statement 2378 * that contains the given column reference (by line number range). 2379 * Uses cached DML index for O(log N) lookup instead of O(N) traversal. 2380 */ 2381 private TCustomSqlStatement findDMLStatementContaining(TCustomSqlStatement parent, TObjectName column) { 2382 if (parent == null || column == null) return null; 2383 2384 long columnLine = column.getLineNo(); 2385 TCustomSqlStatement result = null; 2386 2387 // Use cached DML index (Performance Optimization A) 2388 List<DmlRange> ranges = getDmlIndex(parent); 2389 2390 // Find all DML statements that contain the column by line number 2391 // Need to check all ranges that could contain the column (can't use pure binary search 2392 // because ranges can overlap and we want the innermost one) 2393 for (DmlRange range : ranges) { 2394 // Optimization: if startLine > columnLine, no more ranges can contain it 2395 if (range.startLine > columnLine) { 2396 break; 2397 } 2398 if (range.contains(columnLine)) { 2399 // Found a matching DML statement - prefer the innermost one (later startLine) 2400 if (result == null || 2401 (range.startLine >= result.getStartToken().lineNo)) { 2402 result = range.stmt; 2403 } 2404 } 2405 } 2406 2407 return result; 2408 } 2409 2410 /** 2411 * Sync implicit database/schema from USE DATABASE/USE SCHEMA statements to AST. 2412 * This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2413 * for unqualified object names. 2414 * 2415 * This is similar to what TDatabaseObjectResolver does in the legacy resolver: 2416 * it visits all TObjectName nodes and sets implicitDatabaseName/implicitSchemaName 2417 * based on the current database/schema context. 2418 */ 2419 private void syncImplicitDbSchemaToAST() { 2420 // Get the tracked database context 2421 TSQLEnv env = getSqlEnv(); 2422 if (env == null) { 2423 return; 2424 } 2425 2426 String defaultCatalog = env.getDefaultCatalogName(); 2427 String defaultSchema = env.getDefaultSchemaName(); 2428 2429 // If no defaults are set, nothing to sync 2430 if ((defaultCatalog == null || defaultCatalog.isEmpty()) && 2431 (defaultSchema == null || defaultSchema.isEmpty())) { 2432 return; 2433 } 2434 2435 logDebug("Syncing implicit DB/schema to AST: catalog=" + defaultCatalog + ", schema=" + defaultSchema); 2436 2437 // Visit all statements and set implicit names on TObjectName nodes 2438 for (int i = 0; i < sqlStatements.size(); i++) { 2439 TCustomSqlStatement stmt = sqlStatements.get(i); 2440 if (stmt != null) { 2441 stmt.acceptChildren(new ImplicitDbSchemaVisitor(defaultCatalog, defaultSchema)); 2442 } 2443 } 2444 } 2445 2446 /** 2447 * Visitor to set implicit database/schema on TObjectName nodes. 2448 */ 2449 private static class ImplicitDbSchemaVisitor extends TParseTreeVisitor { 2450 private final String defaultCatalog; 2451 private final String defaultSchema; 2452 2453 public ImplicitDbSchemaVisitor(String defaultCatalog, String defaultSchema) { 2454 this.defaultCatalog = defaultCatalog; 2455 this.defaultSchema = defaultSchema; 2456 } 2457 2458 @Override 2459 public void preVisit(TObjectName node) { 2460 if (node == null) return; 2461 2462 // Skip column objects - they don't need implicit DB/schema 2463 if (node.getDbObjectType() == EDbObjectType.column) return; 2464 2465 // Set default database name if not qualified 2466 if (defaultCatalog != null && !defaultCatalog.isEmpty() && node.getDatabaseToken() == null) { 2467 node.setImplictDatabaseName(defaultCatalog); 2468 } 2469 2470 // Set default schema name if not qualified 2471 if (defaultSchema != null && !defaultSchema.isEmpty() && node.getSchemaToken() == null) { 2472 node.setImplictSchemaName(defaultSchema); 2473 } 2474 } 2475 } 2476 2477 /** 2478 * Selectively clear orphan column syntax hints (sphint) based on TSQLResolver2 resolution. 2479 * 2480 * Phase 1 (linkColumnToTable during parsing) adds sphint hints for columns it can't resolve. 2481 * TSQLResolver2 should: 2482 * 1. KEEP sphint hints for columns that are in allColumnReferences with NOT_FOUND/AMBIGUOUS status 2483 * (these are genuinely orphan/ambiguous columns) 2484 * 2. CLEAR sphint hints for all other columns: 2485 * - Columns successfully resolved (EXACT_MATCH) 2486 * - Columns filtered out by ScopeBuilder (package constants, function keywords, etc.) 2487 * - Columns in contexts TSQLResolver2 doesn't collect (MERGE VALUES, etc.) 2488 */ 2489 private void clearOrphanColumnSyntaxHints() { 2490 // Build a set of positions for columns that should KEEP their sphint hints 2491 // These are columns in allColumnReferences with NOT_FOUND or AMBIGUOUS status 2492 Set<String> orphanPositions = new HashSet<>(); 2493 2494 for (TObjectName col : allColumnReferences) { 2495 if (col == null) continue; 2496 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = col.getResolution(); 2497 if (resolution != null) { 2498 ResolutionStatus status = resolution.getStatus(); 2499 // Only keep sphint for genuinely AMBIGUOUS columns 2500 // NOT_FOUND columns might be due to TSQLResolver2 scope issues (e.g., MERGE WHEN clause) 2501 // so we clear their sphint to match old resolver behavior 2502 if (status == ResolutionStatus.AMBIGUOUS) { 2503 TSourceToken startToken = col.getStartToken(); 2504 if (startToken != null) { 2505 String key = startToken.lineNo + ":" + startToken.columnNo; 2506 orphanPositions.add(key); 2507 } 2508 } 2509 } 2510 } 2511 2512 // Clear sphint hints for positions NOT in orphanPositions 2513 for (int i = 0; i < sqlStatements.size(); i++) { 2514 TCustomSqlStatement stmt = sqlStatements.get(i); 2515 if (stmt == null) continue; 2516 clearNonOrphanSphintHintsRecursive(stmt, orphanPositions); 2517 } 2518 } 2519 2520 /** 2521 * Recursively clear sphint hints except for genuinely orphan columns. 2522 */ 2523 private void clearNonOrphanSphintHintsRecursive(TCustomSqlStatement stmt, Set<String> orphanPositions) { 2524 if (stmt == null) return; 2525 2526 // Clear sphint hints that are NOT for genuinely orphan columns 2527 if (stmt.getSyntaxHints() != null && stmt.getSyntaxHints().size() > 0) { 2528 for (int j = stmt.getSyntaxHints().size() - 1; j >= 0; j--) { 2529 TSyntaxError syntaxError = stmt.getSyntaxHints().get(j); 2530 if (syntaxError.errortype == EErrorType.sphint) { 2531 String key = syntaxError.lineNo + ":" + syntaxError.columnNo; 2532 if (!orphanPositions.contains(key)) { 2533 // This sphint is NOT for a genuinely orphan column - clear it 2534 stmt.getSyntaxHints().remove(j); 2535 logDebug("Cleared sphint at line " + syntaxError.lineNo); 2536 } 2537 // Keep sphint hints for genuinely orphan columns (in orphanPositions) 2538 } 2539 } 2540 } 2541 2542 // Note: orphanColumns is populated by populateOrphanColumns() in Phase 4b 2543 // DO NOT clear it here - TGetTableColumn relies on orphanColumns for 2544 // linkOrphanColumnToFirstTable functionality 2545 2546 // Process nested statements 2547 for (int k = 0; k < stmt.getStatements().size(); k++) { 2548 clearNonOrphanSphintHintsRecursive(stmt.getStatements().get(k), orphanPositions); 2549 } 2550 } 2551 2552 2553 2554 /** 2555 * Filter UNNEST table's linkedColumns to keep only legitimate columns. 2556 * Phase 1 (linkColumnToTable) may incorrectly link external variables to UNNEST 2557 * when UNNEST is the only table in scope. This method removes such incorrect links. 2558 * 2559 * Legitimate columns for UNNEST: 2560 * - Implicit column: the alias (e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2561 * - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2562 * - Derived struct field columns (from UNNEST of STRUCT arrays) 2563 */ 2564 private void filterUnnestLinkedColumns(TTable unnestTable) { 2565 if (unnestTable == null || unnestTable.getTableType() != ETableSource.unnest) { 2566 return; 2567 } 2568 2569 TObjectNameList linkedColumns = unnestTable.getLinkedColumns(); 2570 if (linkedColumns == null || linkedColumns.size() == 0) { 2571 return; 2572 } 2573 2574 // Build set of legitimate column names 2575 java.util.Set<String> legitimateNames = new java.util.HashSet<>(); 2576 2577 // 1. Implicit column (alias name) 2578 String aliasName = unnestTable.getAliasName(); 2579 if (aliasName != null && !aliasName.isEmpty()) { 2580 legitimateNames.add(aliasName.toUpperCase()); 2581 } 2582 2583 // 2. WITH OFFSET column 2584 TUnnestClause unnestClause = unnestTable.getUnnestClause(); 2585 if (unnestClause != null && unnestClause.getWithOffset() != null) { 2586 if (unnestClause.getWithOffsetAlais() != null && 2587 unnestClause.getWithOffsetAlais().getAliasName() != null) { 2588 legitimateNames.add(unnestClause.getWithOffsetAlais().getAliasName().toString().toUpperCase()); 2589 } else { 2590 legitimateNames.add("OFFSET"); 2591 } 2592 } 2593 2594 // 3. Derived struct field columns 2595 if (unnestClause != null && unnestClause.getDerivedColumnList() != null) { 2596 for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) { 2597 TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i); 2598 if (derivedCol != null) { 2599 legitimateNames.add(derivedCol.toString().toUpperCase()); 2600 } 2601 } 2602 } 2603 2604 // 4. Explicit alias columns (Presto/Trino syntax: UNNEST(...) AS t(col1, col2)) 2605 if (unnestTable.getAliasClause() != null && 2606 unnestTable.getAliasClause().getColumns() != null) { 2607 for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) { 2608 TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i); 2609 if (colName != null) { 2610 legitimateNames.add(colName.toString().toUpperCase()); 2611 } 2612 } 2613 } 2614 2615 // Collect columns to keep 2616 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2617 for (int i = 0; i < linkedColumns.size(); i++) { 2618 TObjectName col = linkedColumns.getObjectName(i); 2619 if (col != null) { 2620 String colName = col.getColumnNameOnly(); 2621 if (colName != null && legitimateNames.contains(colName.toUpperCase())) { 2622 toKeep.add(col); 2623 } 2624 } 2625 } 2626 2627 // Clear and re-add only legitimate columns 2628 linkedColumns.clear(); 2629 for (TObjectName col : toKeep) { 2630 linkedColumns.addObjectName(col); 2631 } 2632 } 2633 2634 /** 2635 * Clear linkedColumns on all tables in all statements. 2636 */ 2637 private void clearAllLinkedColumns() { 2638 // Use a set to track processed statements and avoid processing duplicates 2639 // This is important when processing subqueries within tables, as the same 2640 // subquery might be reachable from multiple paths 2641 java.util.Set<TCustomSqlStatement> processed = new java.util.HashSet<>(); 2642 for (int i = 0; i < sqlStatements.size(); i++) { 2643 clearLinkedColumnsRecursive(sqlStatements.get(i), processed); 2644 } 2645 } 2646 2647 /** 2648 * Recursively clear orphanColumns on statements. 2649 * These will be repopulated with genuinely unresolved columns in Phase 4b. 2650 */ 2651 private void clearOrphanColumnsRecursive(TCustomSqlStatement stmt) { 2652 if (stmt == null) return; 2653 2654 if (stmt.getOrphanColumns() != null) { 2655 stmt.getOrphanColumns().clear(); 2656 } 2657 2658 // Process nested statements 2659 for (int i = 0; i < stmt.getStatements().size(); i++) { 2660 clearOrphanColumnsRecursive(stmt.getStatements().get(i)); 2661 } 2662 2663 // Also handle stored procedure/function body statements 2664 if (stmt instanceof gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) { 2665 gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement sp = 2666 (gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) stmt; 2667 for (int i = 0; i < sp.getBodyStatements().size(); i++) { 2668 clearOrphanColumnsRecursive(sp.getBodyStatements().get(i)); 2669 } 2670 } 2671 } 2672 2673 private void clearLinkedColumnsRecursive(TCustomSqlStatement stmt, java.util.Set<TCustomSqlStatement> processed) { 2674 if (stmt == null) return; 2675 2676 // Skip if already processed to avoid redundant work and potential infinite loops 2677 if (processed.contains(stmt)) { 2678 return; 2679 } 2680 processed.add(stmt); 2681 2682 // Skip DAX statements - they populate their own linkedColumns during parsing 2683 // via TDaxFunction.doParse() which calls psql.linkColumnToTable() directly. 2684 // TSQLResolver2's ScopeBuilder doesn't traverse DAX expressions, so we must 2685 // preserve the linkedColumns that DAX parsing already established. 2686 if (stmt instanceof TDaxStmt) { 2687 return; 2688 } 2689 2690 // Skip ALTER TABLE statements - they populate linkedColumns during parsing 2691 // via TAlterTableOption.doParse() which directly adds columns to the target table's 2692 // linkedColumns. TSQLResolver2's ScopeBuilder doesn't traverse these option nodes, 2693 // so we must preserve the linkedColumns that parsing already established. 2694 if (stmt instanceof TAlterTableStatement) { 2695 return; 2696 } 2697 2698 // For CREATE TABLE statements, we need special handling: 2699 // - Regular CREATE TABLE (with column definitions): Preserve constraint columns 2700 // populated during TConstraint.doParse() 2701 // - CTAS (CREATE TABLE AS SELECT): Filter out source columns incorrectly added 2702 // to target table, but preserve the correctly created alias columns 2703 boolean isCreateTable = (stmt instanceof TCreateTableSqlStatement); 2704 if (isCreateTable) { 2705 TCreateTableSqlStatement ctas = (TCreateTableSqlStatement) stmt; 2706 boolean isCTAS = (ctas.getSubQuery() != null); 2707 // For CTAS, filter out source columns from target table's linkedColumns 2708 // The old resolver incorrectly adds source columns (from the SELECT) to the target table 2709 // Keep only columns whose sourceTable is the target table itself 2710 if (isCTAS && ctas.getTargetTable() != null) { 2711 TTable targetTable = ctas.getTargetTable(); 2712 TObjectNameList linkedColumns = targetTable.getLinkedColumns(); 2713 if (linkedColumns != null && linkedColumns.size() > 0) { 2714 // Collect columns to keep (those belonging to target table) 2715 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2716 for (int i = 0; i < linkedColumns.size(); i++) { 2717 TObjectName col = linkedColumns.getObjectName(i); 2718 if (col != null && col.getSourceTable() == targetTable) { 2719 toKeep.add(col); 2720 } 2721 } 2722 // Clear and re-add only the columns to keep 2723 linkedColumns.clear(); 2724 for (TObjectName col : toKeep) { 2725 linkedColumns.addObjectName(col); 2726 } 2727 } 2728 } 2729 } 2730 2731 if (!isCreateTable && stmt.tables != null) { 2732 // Check if this statement contains a TD_UNPIVOT table 2733 // TD_UNPIVOT populates linkedColumns on its inner table during TTDUnpivot.doParse() 2734 // If we clear linkedColumns here, we lose those column references 2735 boolean hasTDUnpivot = false; 2736 for (int i = 0; i < stmt.tables.size(); i++) { 2737 TTable table = stmt.tables.getTable(i); 2738 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2739 hasTDUnpivot = true; 2740 break; 2741 } 2742 } 2743 2744 for (int i = 0; i < stmt.tables.size(); i++) { 2745 TTable table = stmt.tables.getTable(i); 2746 if (table != null && table.getLinkedColumns() != null) { 2747 // For UNNEST tables, filter out incorrectly linked columns from Phase 1. 2748 // Phase 1 (linkColumnToTable) may have linked external variables to UNNEST 2749 // when it's the only table in scope. Keep only legitimate columns: 2750 // - Implicit column (the UNNEST alias, e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2751 // - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2752 if (table.getTableType() == ETableSource.unnest) { 2753 filterUnnestLinkedColumns(table); 2754 continue; 2755 } 2756 // Skip TD_UNPIVOT tables - they don't have their own columns but 2757 // TTDUnpivot.doParse() populates columns on the inner table 2758 if (table.getTableType() == ETableSource.td_unpivot) { 2759 continue; 2760 } 2761 // If this statement contains TD_UNPIVOT, skip clearing all tables 2762 // because TD_UNPIVOT populates linkedColumns on inner tables 2763 if (hasTDUnpivot) { 2764 continue; 2765 } 2766 table.getLinkedColumns().clear(); 2767 } 2768 } 2769 } 2770 2771 // Skip recursive processing if this statement contains TD_UNPIVOT 2772 // TD_UNPIVOT's inner table (in the ON clause) has columns populated during parsing 2773 // and those columns need to be preserved 2774 boolean hasTDUnpivot = false; 2775 if (stmt.tables != null) { 2776 for (int i = 0; i < stmt.tables.size(); i++) { 2777 TTable table = stmt.tables.getTable(i); 2778 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2779 hasTDUnpivot = true; 2780 break; 2781 } 2782 } 2783 } 2784 2785 if (!hasTDUnpivot) { 2786 for (int i = 0; i < stmt.getStatements().size(); i++) { 2787 clearLinkedColumnsRecursive(stmt.getStatements().get(i), processed); 2788 } 2789 2790 // Also process subqueries within tables - these are NOT in getStatements() 2791 // but are accessed via table.getSubquery() 2792 if (stmt.tables != null) { 2793 for (int i = 0; i < stmt.tables.size(); i++) { 2794 TTable table = stmt.tables.getTable(i); 2795 if (table != null && table.getSubquery() != null) { 2796 clearLinkedColumnsRecursive(table.getSubquery(), processed); 2797 } 2798 } 2799 } 2800 } 2801 } 2802 2803 /** 2804 * Recursively fill TTable.getAttributes() for all tables in a statement. 2805 * Uses namespace data already collected during name resolution. 2806 * 2807 * Processing order is important: 2808 * 1. Process CTEs first 2809 * 2. Process leaf tables (objectname, function, etc.) - not JOIN or subquery 2810 * 3. Process subqueries (recursively) 2811 * 4. Process JOIN tables last (they depend on child tables having attributes) 2812 */ 2813 private void fillTableAttributesRecursive(TCustomSqlStatement stmt, Set<TTable> processedTables) { 2814 if (stmt == null) return; 2815 2816 // Skip DAX statements - they use their own attribute/linkedColumn mechanism 2817 // established during TDaxFunction.doParse() parsing phase. 2818 if (stmt instanceof TDaxStmt) { 2819 return; 2820 } 2821 2822 // Skip ALTER TABLE statements - they use their own linkedColumn mechanism 2823 // established during TAlterTableOption.doParse() parsing phase. 2824 if (stmt instanceof TAlterTableStatement) { 2825 return; 2826 } 2827 2828 // Skip CREATE TABLE statements - they use their own linkedColumn mechanism 2829 // established during TConstraint.doParse() parsing phase. 2830 if (stmt instanceof TCreateTableSqlStatement) { 2831 return; 2832 } 2833 2834 // Phase 1: Process CTE tables first 2835 if (stmt instanceof TSelectSqlStatement) { 2836 TSelectSqlStatement selectStmt = (TSelectSqlStatement) stmt; 2837 TCTEList cteList = selectStmt.getCteList(); 2838 if (cteList != null) { 2839 for (int i = 0; i < cteList.size(); i++) { 2840 TCTE cte = cteList.getCTE(i); 2841 if (cte != null && cte.getSubquery() != null) { 2842 fillTableAttributesRecursive(cte.getSubquery(), processedTables); 2843 } 2844 } 2845 } 2846 } 2847 2848 // Collect tables by type for proper processing order 2849 List<TTable> leafTables = new ArrayList<>(); 2850 List<TTable> subqueryTables = new ArrayList<>(); 2851 List<TTable> joinTables = new ArrayList<>(); 2852 2853 // First, collect from stmt.tables 2854 if (stmt.tables != null) { 2855 for (int i = 0; i < stmt.tables.size(); i++) { 2856 TTable table = stmt.tables.getTable(i); 2857 if (table == null || processedTables.contains(table)) continue; 2858 2859 switch (table.getTableType()) { 2860 case join: 2861 joinTables.add(table); 2862 // Also collect nested tables within the join 2863 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2864 break; 2865 case subquery: 2866 subqueryTables.add(table); 2867 break; 2868 default: 2869 leafTables.add(table); 2870 break; 2871 } 2872 } 2873 } 2874 2875 // Also collect from getRelations() - JOIN tables are often stored there 2876 if (stmt.getRelations() != null) { 2877 for (int i = 0; i < stmt.getRelations().size(); i++) { 2878 IRelation rel = stmt.getRelations().get(i); 2879 if (!(rel instanceof TTable)) continue; 2880 TTable table = (TTable) rel; 2881 if (processedTables.contains(table)) continue; 2882 2883 if (table.getTableType() == ETableSource.join) { 2884 if (!joinTables.contains(table)) { 2885 joinTables.add(table); 2886 // Also collect nested tables within the join 2887 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2888 } 2889 } 2890 } 2891 } 2892 2893 // Phase 2: Process leaf tables first (objectname, function, xml, etc.) 2894 for (TTable table : leafTables) { 2895 if (!processedTables.contains(table)) { 2896 fillTableAttributes(table, processedTables, stmt); 2897 processedTables.add(table); 2898 } 2899 } 2900 2901 // Phase 3: Process subqueries (recursively process their contents first) 2902 for (TTable table : subqueryTables) { 2903 if (!processedTables.contains(table)) { 2904 if (table.getSubquery() != null) { 2905 fillTableAttributesRecursive(table.getSubquery(), processedTables); 2906 } 2907 fillTableAttributes(table, processedTables, stmt); 2908 processedTables.add(table); 2909 } 2910 } 2911 2912 // Phase 4: Process JOIN tables last (they need child tables to have attributes) 2913 for (TTable table : joinTables) { 2914 if (!processedTables.contains(table)) { 2915 fillTableAttributes(table, processedTables, stmt); 2916 processedTables.add(table); 2917 } 2918 } 2919 2920 // Process nested statements 2921 for (int i = 0; i < stmt.getStatements().size(); i++) { 2922 fillTableAttributesRecursive(stmt.getStatements().get(i), processedTables); 2923 } 2924 } 2925 2926 /** 2927 * Collect nested tables within a JOIN expression. 2928 * This ensures all component tables are processed before the JOIN itself. 2929 */ 2930 private void collectNestedJoinTables(TTable joinTable, 2931 List<TTable> leafTables, 2932 List<TTable> subqueryTables, 2933 List<TTable> joinTables, 2934 Set<TTable> processedTables) { 2935 if (joinTable == null || joinTable.getJoinExpr() == null) return; 2936 2937 TJoinExpr joinExpr = joinTable.getJoinExpr(); 2938 2939 // Process left table 2940 TTable leftTable = joinExpr.getLeftTable(); 2941 if (leftTable != null && !processedTables.contains(leftTable)) { 2942 switch (leftTable.getTableType()) { 2943 case join: 2944 joinTables.add(leftTable); 2945 collectNestedJoinTables(leftTable, leafTables, subqueryTables, joinTables, processedTables); 2946 break; 2947 case subquery: 2948 subqueryTables.add(leftTable); 2949 break; 2950 default: 2951 leafTables.add(leftTable); 2952 break; 2953 } 2954 } 2955 2956 // Process right table 2957 TTable rightTable = joinExpr.getRightTable(); 2958 if (rightTable != null && !processedTables.contains(rightTable)) { 2959 switch (rightTable.getTableType()) { 2960 case join: 2961 joinTables.add(rightTable); 2962 collectNestedJoinTables(rightTable, leafTables, subqueryTables, joinTables, processedTables); 2963 break; 2964 case subquery: 2965 subqueryTables.add(rightTable); 2966 break; 2967 default: 2968 leafTables.add(rightTable); 2969 break; 2970 } 2971 } 2972 } 2973 2974 /** 2975 * Fill TTable.getAttributes() for a single table using namespace data. 2976 * This converts the namespace's columnSources to TAttributeNode objects. 2977 * 2978 * @param table The table to fill attributes for 2979 * @param processedTables Set of already processed tables to avoid duplicates 2980 * @param stmt The statement context (used for UNNEST to get the SELECT statement) 2981 */ 2982 private void fillTableAttributes(TTable table, Set<TTable> processedTables, TCustomSqlStatement stmt) { 2983 if (table == null) return; 2984 2985 // Clear existing attributes 2986 table.getAttributes().clear(); 2987 2988 String displayName = table.getDisplayName(true); 2989 if (displayName == null || displayName.isEmpty()) { 2990 displayName = table.getAliasName(); 2991 if (displayName == null || displayName.isEmpty()) { 2992 displayName = table.getName(); 2993 } 2994 } 2995 2996 // First, try to use existing namespace from ScopeBuildResult 2997 // Skip namespace lookup for UNNEST tables - they need special handling via initAttributesForUnnest 2998 INamespace existingNamespace = null; 2999 if (table.getTableType() != ETableSource.unnest) { 3000 existingNamespace = scopeBuildResult != null 3001 ? scopeBuildResult.getNamespaceForTable(table) 3002 : null; 3003 } 3004 3005 if (existingNamespace != null) { 3006 // Use existing namespace's column sources 3007 // Returns false if namespace has no real metadata (only inferred columns) 3008 if (fillAttributesFromNamespace(table, existingNamespace, displayName)) { 3009 return; 3010 } 3011 // Fall through to legacy logic if no real metadata 3012 } 3013 3014 // Fall back to type-specific handling if no namespace found 3015 switch (table.getTableType()) { 3016 case objectname: 3017 if (table.isCTEName()) { 3018 // CTE reference - use initAttributesFromCTE 3019 TCTE cte = table.getCTE(); 3020 if (cte != null) { 3021 table.initAttributesFromCTE(cte); 3022 } 3023 } else { 3024 // Physical table - create TableNamespace and extract columns 3025 fillPhysicalTableAttributes(table, displayName); 3026 } 3027 break; 3028 3029 case subquery: 3030 // Subquery - use initAttributesFromSubquery 3031 if (table.getSubquery() != null) { 3032 String prefix = ""; 3033 if (table.getAliasClause() != null) { 3034 prefix = table.getAliasClause().toString() + "."; 3035 } 3036 table.initAttributesFromSubquery(table.getSubquery(), prefix); 3037 } 3038 break; 3039 3040 case join: 3041 // JOIN - combine attributes from left and right tables 3042 // First, add USING columns to the left and right tables (if present) 3043 if (table.getJoinExpr() != null) { 3044 addUsingColumnsToTables(table.getJoinExpr()); 3045 // Then initialize the join expression's attributes (which pulls from left/right tables) 3046 table.getJoinExpr().initAttributes(0); 3047 } 3048 table.initAttributesForJoin(); 3049 break; 3050 3051 case function: 3052 // Table function 3053 table.initAttributeForTableFunction(); 3054 break; 3055 3056 case xmltable: 3057 // XML table 3058 table.initAttributeForXMLTable(); 3059 break; 3060 3061 case tableExpr: 3062 // Table expression 3063 TAttributeNode.addNodeToList( 3064 new TAttributeNode(displayName + ".*", table), 3065 table.getAttributes() 3066 ); 3067 break; 3068 3069 case rowList: 3070 // Row list 3071 table.initAttributeForRowList(); 3072 break; 3073 3074 case unnest: 3075 // UNNEST - initialize attributes using the SELECT statement context 3076 if (stmt instanceof TSelectSqlStatement) { 3077 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3078 table.initAttributesForUnnest(getSqlEnv(), select); 3079 } 3080 break; 3081 3082 case pivoted_table: 3083 // PIVOT table 3084 table.initAttributesForPivotTable(); 3085 break; 3086 } 3087 } 3088 3089 /** 3090 * Fill table attributes from an existing namespace's column sources. 3091 * This uses the namespace data that was collected during ScopeBuilder traversal. 3092 * 3093 * @return true if attributes were successfully filled, false if should fall back to legacy logic 3094 */ 3095 private boolean fillAttributesFromNamespace(TTable table, INamespace namespace, String displayName) { 3096 // Ensure namespace is validated 3097 if (!namespace.isValidated()) { 3098 namespace.validate(); 3099 } 3100 3101 // For TableNamespace without actual metadata (only inferred columns), 3102 // return false to fall back to legacy logic which uses wildcards 3103 if (namespace instanceof TableNamespace) { 3104 TableNamespace tableNs = (TableNamespace) namespace; 3105 // Check if the namespace has actual metadata by seeing if there are any columns 3106 // with high confidence from metadata sources (not inferred) 3107 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3108 boolean hasRealMetadata = false; 3109 for (ColumnSource source : columnSources.values()) { 3110 if (source.getConfidence() >= 1.0 && 3111 !("inferred_from_usage".equals(source.getEvidence()))) { 3112 hasRealMetadata = true; 3113 break; 3114 } 3115 } 3116 if (!hasRealMetadata) { 3117 // No real metadata, fall back to legacy logic with wildcards 3118 return false; 3119 } 3120 3121 // Has metadata - use namespace columns 3122 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3123 String colName = entry.getKey(); 3124 ColumnSource source = entry.getValue(); 3125 // Only include columns with real metadata, not inferred ones 3126 if (source.getConfidence() >= 1.0 && 3127 !("inferred_from_usage".equals(source.getEvidence()))) { 3128 TAttributeNode.addNodeToList( 3129 new TAttributeNode(displayName + "." + colName, table), 3130 table.getAttributes() 3131 ); 3132 } 3133 } 3134 3135 // If no columns after filtering, add wildcard 3136 if (table.getAttributes().isEmpty()) { 3137 TAttributeNode.addNodeToList( 3138 new TAttributeNode(displayName + ".*", table), 3139 table.getAttributes() 3140 ); 3141 } 3142 return true; 3143 } 3144 3145 // For other namespace types (SubqueryNamespace, CTENamespace, etc.), 3146 // use all column sources 3147 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3148 if (columnSources != null && !columnSources.isEmpty()) { 3149 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3150 String colName = entry.getKey(); 3151 TAttributeNode.addNodeToList( 3152 new TAttributeNode(displayName + "." + colName, table), 3153 table.getAttributes() 3154 ); 3155 } 3156 } 3157 3158 // If no columns found, add wildcard attribute 3159 if (table.getAttributes().isEmpty()) { 3160 TAttributeNode.addNodeToList( 3161 new TAttributeNode(displayName + ".*", table), 3162 table.getAttributes() 3163 ); 3164 } 3165 return true; 3166 } 3167 3168 /** 3169 * Fill attributes for a physical table using TableNamespace. 3170 */ 3171 private void fillPhysicalTableAttributes(TTable table, String displayName) { 3172 // Create namespace for this table with sqlEnv and vendor for qualified name resolution 3173 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 3174 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 3175 TableNamespace namespace = new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 3176 3177 // Validate to populate columnSources 3178 namespace.validate(); 3179 3180 // Convert columnSources to TAttributeNode 3181 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3182 if (columnSources != null && !columnSources.isEmpty()) { 3183 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3184 String colName = entry.getKey(); 3185 TAttributeNode.addNodeToList( 3186 new TAttributeNode(displayName + "." + colName, table), 3187 table.getAttributes() 3188 ); 3189 } 3190 } 3191 3192 // If no columns found from metadata, add wildcard attribute 3193 // (this allows any column to potentially match) 3194 if (table.getAttributes().isEmpty()) { 3195 // Add columns from linkedColumns if available 3196 if (table.getLinkedColumns() != null && table.getLinkedColumns().size() > 0) { 3197 for (TObjectName col : table.getLinkedColumns()) { 3198 if (col.getCandidateTables() != null && col.getCandidateTables().size() > 1) { 3199 continue; // Skip ambiguous columns 3200 } 3201 TAttributeNode.addNodeToList( 3202 new TAttributeNode(displayName + "." + col.getColumnNameOnly(), table), 3203 table.getAttributes() 3204 ); 3205 } 3206 } 3207 // Add wildcard attribute 3208 TAttributeNode.addNodeToList( 3209 new TAttributeNode(displayName + ".*", table), 3210 table.getAttributes() 3211 ); 3212 } 3213 } 3214 3215 /** 3216 * Add USING columns to the left and right tables in a JOIN expression. 3217 * USING columns should appear in both tables' attribute lists before the wildcard. 3218 * This method recursively handles nested JOINs. 3219 */ 3220 private void addUsingColumnsToTables(TJoinExpr joinExpr) { 3221 if (joinExpr == null) return; 3222 3223 // Recursively handle nested joins 3224 TTable leftTable = joinExpr.getLeftTable(); 3225 TTable rightTable = joinExpr.getRightTable(); 3226 3227 if (leftTable != null && leftTable.getTableType() == ETableSource.join && leftTable.getJoinExpr() != null) { 3228 addUsingColumnsToTables(leftTable.getJoinExpr()); 3229 } 3230 if (rightTable != null && rightTable.getTableType() == ETableSource.join && rightTable.getJoinExpr() != null) { 3231 addUsingColumnsToTables(rightTable.getJoinExpr()); 3232 } 3233 3234 // Handle USING columns in this join 3235 gudusoft.gsqlparser.nodes.TObjectNameList usingColumns = joinExpr.getUsingColumns(); 3236 if (usingColumns == null || usingColumns.size() == 0) return; 3237 3238 // Add USING columns to both tables 3239 for (int i = 0; i < usingColumns.size(); i++) { 3240 TObjectName usingCol = usingColumns.getObjectName(i); 3241 if (usingCol == null) continue; 3242 String colName = usingCol.getColumnNameOnly(); 3243 3244 // Add to left table (insert before wildcard if possible) 3245 if (leftTable != null && leftTable.getTableType() != ETableSource.join) { 3246 addColumnAttributeBeforeWildcard(leftTable, colName); 3247 } 3248 3249 // Add to right table (insert before wildcard if possible) 3250 if (rightTable != null && rightTable.getTableType() != ETableSource.join) { 3251 addColumnAttributeBeforeWildcard(rightTable, colName); 3252 } 3253 } 3254 } 3255 3256 /** 3257 * Add a column attribute to a table, inserting before the wildcard (*) if present. 3258 * This ensures USING columns appear before the wildcard in the attribute list. 3259 */ 3260 private void addColumnAttributeBeforeWildcard(TTable table, String columnName) { 3261 if (table == null || columnName == null) return; 3262 3263 String displayName = table.getDisplayName(true); 3264 if (displayName == null || displayName.isEmpty()) { 3265 displayName = table.getAliasName(); 3266 if (displayName == null || displayName.isEmpty()) { 3267 displayName = table.getName(); 3268 } 3269 } 3270 3271 String attrName = displayName + "." + columnName; 3272 3273 // Check if attribute already exists 3274 ArrayList<TAttributeNode> attrs = table.getAttributes(); 3275 for (TAttributeNode attr : attrs) { 3276 if (attr.getName().equalsIgnoreCase(attrName)) { 3277 return; // Already exists 3278 } 3279 } 3280 3281 // Find the wildcard position 3282 int wildcardIndex = -1; 3283 for (int i = 0; i < attrs.size(); i++) { 3284 if (attrs.get(i).getName().endsWith(".*")) { 3285 wildcardIndex = i; 3286 break; 3287 } 3288 } 3289 3290 // Insert before wildcard or add to end 3291 TAttributeNode newAttr = new TAttributeNode(attrName, table); 3292 if (wildcardIndex >= 0) { 3293 attrs.add(wildcardIndex, newAttr); 3294 } else { 3295 TAttributeNode.addNodeToList(newAttr, attrs); 3296 } 3297 } 3298 3299 /** 3300 * Sync a single column to legacy structures. 3301 * @return true if column was synced (had a sourceTable) 3302 */ 3303 private boolean syncColumnToLegacy(TObjectName column) { 3304 if (column == null) return false; 3305 3306 // Special handling for star columns (SELECT *) 3307 // Star columns represent ALL tables in the FROM clause and should be synced to ALL tables 3308 // in their sourceTableList, not just the first one. 3309 String columnName = column.getColumnNameOnly(); 3310 if (columnName != null && columnName.equals("*")) { 3311 java.util.ArrayList<TTable> sourceTableList = column.getSourceTableList(); 3312 if (sourceTableList != null && sourceTableList.size() > 0) { 3313 boolean synced = false; 3314 for (TTable starTable : sourceTableList) { 3315 if (starTable == null) continue; 3316 // Skip subquery types - the star should be linked to physical tables 3317 if (starTable.getTableType() == ETableSource.subquery) continue; 3318 gudusoft.gsqlparser.nodes.TObjectNameList starLinkedColumns = starTable.getLinkedColumns(); 3319 if (starLinkedColumns != null && !containsColumn(starLinkedColumns, column)) { 3320 starLinkedColumns.addObjectName(column); 3321 synced = true; 3322 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3323 logInfo("syncColumnToLegacy: Synced star column to sourceTableList table: " 3324 + starTable.getTableName()); 3325 } 3326 } 3327 } 3328 return synced; 3329 } 3330 } 3331 3332 // Check if column is AMBIGUOUS - don't sync to legacy if it's ambiguous 3333 // Ambiguous columns should be added to orphanColumns, not linkedColumns 3334 // NOTE: Skip this check for star columns (*) since they are handled specially 3335 // via sourceTableList and should be linked to all tables in the FROM clause 3336 ResolutionResult resolution = column.getResolution(); 3337 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS) { 3338 // Don't treat star columns as ambiguous - they're supposed to match all tables 3339 if (columnName != null && columnName.equals("*")) { 3340 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3341 logInfo("syncColumnToLegacy: Star column has AMBIGUOUS status, proceeding with normal sync"); 3342 } 3343 // Fall through to normal processing 3344 } else { 3345 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3346 logInfo("syncColumnToLegacy: Skipping AMBIGUOUS column: " + column.toString() 3347 + " with " + (resolution.getAmbiguousSource() != null ? 3348 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 3349 } 3350 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) 3351 // This ensures the column will be treated as orphan by TGetTableColumn 3352 if (column.getSourceTable() != null) { 3353 column.setSourceTable(null); 3354 } 3355 return false; 3356 } 3357 } 3358 3359 TTable sourceTable = column.getSourceTable(); 3360 ColumnSource source = column.getColumnSource(); 3361 3362 // Handle columns resolved through PlsqlVariableNamespace 3363 // These are stored procedure variables/parameters - mark them as variables 3364 // so they won't be added to orphan columns 3365 if (source != null && source.getSourceNamespace() instanceof gudusoft.gsqlparser.resolver2.namespace.PlsqlVariableNamespace) { 3366 column.setDbObjectTypeDirectly(EDbObjectType.variable); 3367 // Variables don't need to be linked to tables 3368 return false; 3369 } 3370 3371 // Fix for subquery columns: When a column is EXPLICITLY QUALIFIED with a subquery alias 3372 // (e.g., mm.material_id), the old resolver Phase 1 may have incorrectly set sourceTable 3373 // to the physical table inside the subquery. TSQLResolver2 should correct this to point 3374 // to the subquery TTable itself. This preserves the intermediate layer for data lineage: 3375 // mm.material_id -> subquery mm -> physical table 3376 // 3377 // IMPORTANT: Only apply this correction for QUALIFIED columns. Unqualified columns 3378 // (like those inferred from star column expansion) should keep their physical table 3379 // sourceTable for proper data lineage tracing. 3380 if (source != null && column.isQualified()) { 3381 INamespace ns = source.getSourceNamespace(); 3382 if (ns instanceof SubqueryNamespace) { 3383 TTable subqueryTable = ns.getSourceTable(); 3384 // If the subquery's TTable is different from the current sourceTable, 3385 // use the subquery's TTable to maintain proper semantic layering 3386 if (subqueryTable != null && subqueryTable != sourceTable) { 3387 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3388 logInfo("syncColumnToLegacy: Correcting sourceTable from " + 3389 (sourceTable != null ? sourceTable.getTableName() : "null") + 3390 " to subquery " + subqueryTable.getTableName() + " for qualified column " + column.toString()); 3391 } 3392 sourceTable = subqueryTable; 3393 column.setSourceTable(sourceTable); 3394 } 3395 } 3396 } 3397 3398 // If sourceTable is null, try to get it from ColumnSource 3399 // This handles columns resolved to derived tables (subqueries with aliases) 3400 // where TSQLResolver2 resolved via ColumnSource but didn't set sourceTable on TObjectName 3401 if (sourceTable == null && source != null) { 3402 // For alias columns (isColumnAlias) or passthroughs to aliases (getFinalColumnName != null), 3403 // prefer the immediate source table (subquery/CTE) over the traced physical table. 3404 // The alias name doesn't exist in the physical table, so linking with alias name is wrong. 3405 boolean isAliasColumn = source.isColumnAlias() || source.getFinalColumnName() != null; 3406 if (isAliasColumn) { 3407 INamespace ns = source.getSourceNamespace(); 3408 if (ns != null) { 3409 TTable immediateTable = ns.getSourceTable(); 3410 if (immediateTable != null) { 3411 sourceTable = immediateTable; 3412 column.setSourceTable(sourceTable); 3413 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3414 logInfo("syncColumnToLegacy: Set sourceTable to immediate source for alias column " 3415 + column.toString() + " -> " + immediateTable.getTableName()); 3416 } 3417 } 3418 } 3419 } 3420 if (sourceTable == null) { 3421 TTable finalTable = source.getFinalTable(); 3422 if (finalTable != null) { 3423 sourceTable = finalTable; 3424 column.setSourceTable(sourceTable); 3425 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3426 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getFinalTable() for " 3427 + column.toString() + " -> " + finalTable.getTableName()); 3428 } 3429 } else { 3430 // Try getAllFinalTables() - this may succeed when getFinalTable() returns null 3431 // For example, columns inferred through star push-down may have overrideTable set 3432 // which getAllFinalTables() will return as a single-element list 3433 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3434 if (allFinalTables != null && !allFinalTables.isEmpty()) { 3435 // Use the first non-subquery table from allFinalTables 3436 for (TTable candidateTable : allFinalTables) { 3437 if (candidateTable != null && candidateTable.getTableType() != ETableSource.subquery) { 3438 sourceTable = candidateTable; 3439 column.setSourceTable(sourceTable); 3440 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3441 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getAllFinalTables() for " 3442 + column.toString() + " -> " + candidateTable.getTableName()); 3443 } 3444 break; 3445 } 3446 } 3447 } 3448 3449 // Fallback: try overrideTable for cases like derived tables in JOIN ON clauses 3450 if (sourceTable == null) { 3451 TTable overrideTable = source.getOverrideTable(); 3452 if (overrideTable != null) { 3453 sourceTable = overrideTable; 3454 column.setSourceTable(sourceTable); 3455 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3456 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getOverrideTable() for " 3457 + column.toString() + " -> " + overrideTable.getTableName()); 3458 } 3459 } 3460 } 3461 } 3462 } 3463 } 3464 3465 if (sourceTable == null) { 3466 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && source != null) { 3467 logInfo("syncColumnToLegacy: Column " + column.toString() 3468 + " has ColumnSource but no table. Namespace: " 3469 + (source.getSourceNamespace() != null ? source.getSourceNamespace().getClass().getSimpleName() : "null") 3470 + ", evidence: " + source.getEvidence()); 3471 } 3472 return false; 3473 } 3474 3475 // For struct-field access (e.g., customer.customer_id in BigQuery), 3476 // create a synthetic column representing the base column (e.g., "customer") 3477 // instead of using the original column which has the field name (e.g., "customer_id") 3478 if (source != null && source.isStructFieldAccess()) { 3479 String baseColumnName = source.getExposedName(); 3480 if (baseColumnName != null && !baseColumnName.isEmpty()) { 3481 // Create synthetic TObjectName for the base column 3482 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvbigquery; 3483 TObjectName baseColumn = TObjectName.createObjectName( 3484 vendor, EDbObjectType.column, baseColumnName); 3485 baseColumn.setSourceTable(sourceTable); 3486 3487 // Add the base column to linkedColumns (avoid duplicates by name) 3488 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3489 if (linkedColumns != null && !containsColumnByName(linkedColumns, baseColumnName)) { 3490 linkedColumns.addObjectName(baseColumn); 3491 } 3492 return true; // Skip adding the original struct-qualified column to linkedColumns. 3493 // DataFlowAnalyzer uses FieldPath from the original TObjectName to match 3494 // against the synthetic base column via getStructFieldFullName(). 3495 } 3496 } 3497 3498 // 1. Add to TTable.linkedColumns (avoid duplicates) 3499 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3500 if (linkedColumns != null && !containsColumn(linkedColumns, column)) { 3501 linkedColumns.addObjectName(column); 3502 } 3503 3504 // 2. For UNION scenarios, also add to all final tables from UNION branches 3505 // This is critical for star column push-down tests that expect columns to be 3506 // linked to ALL tables in a UNION, not just the first one. 3507 if (source != null) { 3508 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3509 if (allFinalTables != null && allFinalTables.size() > 1) { 3510 for (TTable unionTable : allFinalTables) { 3511 if (unionTable == null || unionTable == sourceTable) continue; 3512 // Skip subquery types - only link to physical tables 3513 if (unionTable.getTableType() == ETableSource.subquery) continue; 3514 gudusoft.gsqlparser.nodes.TObjectNameList unionLinkedColumns = unionTable.getLinkedColumns(); 3515 if (unionLinkedColumns != null && !containsColumn(unionLinkedColumns, column)) { 3516 unionLinkedColumns.addObjectName(column); 3517 } 3518 } 3519 } 3520 3521 // 2b. For CTE columns, also link to the CTE reference table 3522 // When a column is resolved through a CTE, it should be linked to both: 3523 // - The CTE reference table (immediate source) 3524 // - The underlying physical tables (final source) 3525 INamespace ns = source.getSourceNamespace(); 3526 if (ns instanceof gudusoft.gsqlparser.resolver2.namespace.CTENamespace) { 3527 gudusoft.gsqlparser.resolver2.namespace.CTENamespace cteNs = 3528 (gudusoft.gsqlparser.resolver2.namespace.CTENamespace) ns; 3529 TTable cteTable = cteNs.getReferencingTable(); 3530 if (cteTable != null && cteTable != sourceTable) { 3531 gudusoft.gsqlparser.nodes.TObjectNameList cteLinkedColumns = cteTable.getLinkedColumns(); 3532 if (cteLinkedColumns != null && !containsColumn(cteLinkedColumns, column)) { 3533 cteLinkedColumns.addObjectName(column); 3534 } 3535 } 3536 } 3537 3538 // 2c. For subquery columns, also link to the underlying physical tables 3539 // When sourceTable is a subquery (e.g., qualified column S.id from MERGE USING subquery), 3540 // TGetTableColumn needs the column to be linked to physical tables for output. 3541 // Use getFinalTable() to trace through to the ultimate physical table. 3542 // IMPORTANT: Only link if a column with the same name doesn't already exist - 3543 // this avoids duplicates when both outer and inner queries reference the same column. 3544 // EXCEPTION: Skip MERGE ON clause columns - they should not be linked to the source 3545 // subquery's underlying table because they may belong to the target table instead. 3546 if (sourceTable.getTableType() == ETableSource.subquery) { 3547 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 3548 // subquery's underlying table via star column expansion. 3549 // This is particularly important for MERGE ON clause columns which may 3550 // belong to the target table rather than the source subquery. 3551 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 3552 // the source subquery. 3553 // Note: We check location only because ownStmt may be null for unresolved columns. 3554 boolean isUnqualifiedJoinConditionColumn = (column.getLocation() == ESqlClause.joinCondition) 3555 && (column.getTableString() == null || column.getTableString().isEmpty()); 3556 if (isUnqualifiedJoinConditionColumn && TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3557 logInfo("syncColumnToLegacy: Skipping unqualified join condition column " + column.toString() + 3558 " - should not be traced to subquery's underlying table"); 3559 } 3560 3561 // Skip alias columns - the alias name doesn't exist in the physical table, 3562 // so linking an alias-named column to the physical table produces wrong output 3563 // (e.g., TestTableEmployee.name instead of TestTableEmployee.ename). 3564 // getFinalTable() traces through aliases to find the physical table, but the 3565 // column name is still the alias. Only non-alias columns should be linked. 3566 boolean isAliasColumnForLinking = source.isColumnAlias() || source.getFinalColumnName() != null; 3567 3568 if (!isUnqualifiedJoinConditionColumn && !isAliasColumnForLinking) { 3569 TTable finalTable = source.getFinalTable(); 3570 if (finalTable != null && finalTable != sourceTable && 3571 finalTable.getTableType() != ETableSource.subquery) { 3572 gudusoft.gsqlparser.nodes.TObjectNameList finalLinkedColumns = finalTable.getLinkedColumns(); 3573 if (finalLinkedColumns != null && !containsColumn(finalLinkedColumns, column) 3574 && !containsColumnByName(finalLinkedColumns, column.getColumnNameOnly())) { 3575 finalLinkedColumns.addObjectName(column); 3576 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3577 logInfo("syncColumnToLegacy: Also linked " + column.toString() + 3578 " to underlying physical table " + finalTable.getTableName()); 3579 } 3580 } 3581 } 3582 } 3583 } 3584 3585 } 3586 3587 // 3. Sync linkedColumnDef and sourceColumn from ColumnSource 3588 if (source != null) { 3589 Object defNode = source.getDefinitionNode(); 3590 3591 // Set linkedColumnDef if definition is a TColumnDefinition 3592 if (defNode instanceof gudusoft.gsqlparser.nodes.TColumnDefinition) { 3593 column.setLinkedColumnDef((gudusoft.gsqlparser.nodes.TColumnDefinition) defNode); 3594 } 3595 3596 // Set sourceColumn if definition is a TResultColumn 3597 // BUT skip for CTE explicit columns - these reference the CTE column name (e.g., "mgr_dept") 3598 // not the underlying SELECT column (e.g., "grp"). The CTE column is a TObjectName, 3599 // not a TResultColumn, so we cannot set it as sourceColumn. 3600 if (defNode instanceof TResultColumn) { 3601 String evidence = source.getEvidence(); 3602 boolean isCTEExplicitColumn = evidence != null && evidence.startsWith("cte_explicit_column"); 3603 if (!isCTEExplicitColumn) { 3604 column.setSourceColumn((TResultColumn) defNode); 3605 } 3606 } 3607 // Special case: for star-inferred columns, set sourceColumn to the star column 3608 // The definitionNode is intentionally null to avoid affecting formatter output, 3609 // but we still need to set sourceColumn for legacy API compatibility. 3610 // Use setSourceColumnOnly to avoid changing dbObjectType which affects filtering. 3611 else if (defNode == null && source.getEvidence() != null 3612 && source.getEvidence().contains("auto_inferred")) { 3613 // This is a star-inferred column - get the star column from the namespace 3614 INamespace namespace = source.getSourceNamespace(); 3615 if (namespace != null) { 3616 TResultColumn starColumn = namespace.getStarColumn(); 3617 if (starColumn != null) { 3618 column.setSourceColumnOnly(starColumn); 3619 } 3620 } 3621 } 3622 } 3623 3624 return true; 3625 } 3626 3627 /** 3628 * Check if a column already exists in the list (by identity). 3629 */ 3630 private boolean containsColumn(gudusoft.gsqlparser.nodes.TObjectNameList list, TObjectName column) { 3631 for (int i = 0; i < list.size(); i++) { 3632 if (list.getObjectName(i) == column) { 3633 return true; 3634 } 3635 } 3636 return false; 3637 } 3638 3639 /** 3640 * Check if a column with the given name already exists in the list. 3641 * Used for struct-field access where we create synthetic columns. 3642 */ 3643 private boolean containsColumnByName(gudusoft.gsqlparser.nodes.TObjectNameList list, String columnName) { 3644 if (columnName == null) return false; 3645 // Normalize by stripping quotes for comparison 3646 String normalizedName = stripQuotes(columnName); 3647 for (int i = 0; i < list.size(); i++) { 3648 TObjectName col = list.getObjectName(i); 3649 if (col != null) { 3650 String existingName = stripQuotes(col.getColumnNameOnly()); 3651 if (normalizedName.equalsIgnoreCase(existingName)) { 3652 return true; 3653 } 3654 } 3655 } 3656 return false; 3657 } 3658 3659 /** 3660 * Strip leading/trailing quote characters from a string. 3661 */ 3662 private String stripQuotes(String s) { 3663 if (s == null) return null; 3664 if (s.length() >= 2) { 3665 char first = s.charAt(0); 3666 char last = s.charAt(s.length() - 1); 3667 if ((first == '"' && last == '"') || 3668 (first == '\'' && last == '\'') || 3669 (first == '`' && last == '`') || 3670 (first == '[' && last == ']')) { 3671 return s.substring(1, s.length() - 1); 3672 } 3673 } 3674 return s; 3675 } 3676 3677 /** 3678 * Check if a subquery SELECT statement has an explicit (non-star) column with the given name. 3679 * This is used to determine whether to create traced column clones: 3680 * - If the column matches an explicit column in the subquery, don't clone (stays at subquery level) 3681 * - If the column doesn't match explicit columns (must come from star), clone to physical table 3682 * 3683 * @param subquery the SELECT statement to check 3684 * @param columnName the column name to look for (may have quotes) 3685 * @return true if the subquery has an explicit column matching the name 3686 */ 3687 private boolean subqueryHasExplicitColumn(TSelectSqlStatement subquery, String columnName) { 3688 if (subquery == null || columnName == null) { 3689 return false; 3690 } 3691 3692 // For combined queries (UNION/INTERSECT/EXCEPT), follow left chain iteratively 3693 TSelectSqlStatement current = subquery; 3694 while (current.isCombinedQuery()) { 3695 current = current.getLeftStmt(); 3696 if (current == null) { 3697 return false; 3698 } 3699 } 3700 subquery = current; 3701 3702 TResultColumnList resultColumns = subquery.getResultColumnList(); 3703 if (resultColumns == null) { 3704 return false; 3705 } 3706 3707 // Normalize the column name for comparison (strip quotes) 3708 String normalizedName = stripQuotes(columnName); 3709 3710 for (int i = 0; i < resultColumns.size(); i++) { 3711 TResultColumn rc = resultColumns.getResultColumn(i); 3712 if (rc == null) { 3713 continue; 3714 } 3715 3716 String colStr = rc.toString(); 3717 // Skip star columns - they're not explicit columns 3718 if (colStr != null && (colStr.equals("*") || colStr.endsWith(".*"))) { 3719 continue; 3720 } 3721 3722 // Get the effective column name (alias if present, otherwise the column name) 3723 String effectiveName = null; 3724 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 3725 effectiveName = rc.getAliasClause().getAliasName().toString(); 3726 } else if (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null) { 3727 // For simple column references like "t1.COL1", get the column name 3728 effectiveName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 3729 } 3730 3731 if (effectiveName != null) { 3732 String normalizedEffective = stripQuotes(effectiveName); 3733 if (normalizedName.equalsIgnoreCase(normalizedEffective)) { 3734 return true; 3735 } 3736 } 3737 } 3738 3739 return false; 3740 } 3741 3742 /** 3743 * Expand star columns using push-down inferred columns from namespaces. 3744 * 3745 * This is the core of the star column push-down algorithm: 3746 * 1. Find all star columns in SELECT lists 3747 * 2. For each star column, find its source namespace(s) 3748 * 3. Get inferred columns from the namespace (collected during resolution) 3749 * 4. Expand the star column by populating attributeNodesDerivedFromFromClause 3750 * 3751 * This enables star column expansion without TSQLEnv metadata by using 3752 * columns referenced in outer queries to infer what the star expands to. 3753 */ 3754 private void expandStarColumnsUsingPushDown() { 3755 int expandedCount = 0; 3756 Set<TCustomSqlStatement> processedStmts = new HashSet<>(); 3757 3758 // Track expanded star columns by their string representation for syncing 3759 Map<String, ArrayList<TAttributeNode>> expandedStarCols = new HashMap<>(); 3760 3761 // Process all statements recursively 3762 for (int i = 0; i < sqlStatements.size(); i++) { 3763 expandedCount += expandStarColumnsInStatement(sqlStatements.get(i), processedStmts, expandedStarCols); 3764 } 3765 3766 // Sync expanded attributes to column references in getAllColumnReferences() 3767 // The result column TObjectNames might be different instances than those collected 3768 // during scope building, so we need to copy the expanded attrs 3769 if (scopeBuildResult != null && !expandedStarCols.isEmpty()) { 3770 for (TObjectName colRef : scopeBuildResult.getAllColumnReferences()) { 3771 if (colRef == null) continue; 3772 String colStr = colRef.toString(); 3773 if (colStr == null || !colStr.endsWith("*")) continue; 3774 3775 // Skip if already has expanded attrs 3776 ArrayList<TAttributeNode> existingAttrs = colRef.getAttributeNodesDerivedFromFromClause(); 3777 if (existingAttrs != null && !existingAttrs.isEmpty()) continue; 3778 3779 // Find matching expanded star column 3780 ArrayList<TAttributeNode> expandedAttrs = expandedStarCols.get(colStr); 3781 if (expandedAttrs != null && !expandedAttrs.isEmpty()) { 3782 // Copy the expanded attrs to this column reference 3783 for (TAttributeNode attr : expandedAttrs) { 3784 TAttributeNode.addNodeToList(attr, existingAttrs); 3785 } 3786 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3787 logInfo("Synced " + expandedAttrs.size() + " expanded attrs to column reference: " + colStr); 3788 } 3789 } 3790 } 3791 } 3792 3793 logInfo("Expanded star columns using push-down: " + expandedCount + " columns added"); 3794 } 3795 3796 /** 3797 * Recursively expand star columns in a statement and its nested statements. 3798 * Uses processedStmts to track ALL statements (not just SELECTs) to prevent infinite loops. 3799 */ 3800 private int expandStarColumnsInStatement(TCustomSqlStatement stmt, Set<TCustomSqlStatement> processedStmts, 3801 Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 3802 if (stmt == null) return 0; 3803 3804 // Cycle detection: skip if already processed this statement 3805 if (processedStmts.contains(stmt)) { 3806 return 0; 3807 } 3808 processedStmts.add(stmt); 3809 3810 int count = 0; 3811 3812 // Handle SELECT statements 3813 if (stmt instanceof TSelectSqlStatement) { 3814 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3815 count += expandStarColumnsInSelect(select, expandedStarCols); 3816 3817 // Handle UNION/INTERSECT/EXCEPT - iteratively collect all branches 3818 if (select.isCombinedQuery()) { 3819 Deque<TSelectSqlStatement> unionStack = new ArrayDeque<>(); 3820 if (select.getLeftStmt() != null) unionStack.push(select.getLeftStmt()); 3821 if (select.getRightStmt() != null) unionStack.push(select.getRightStmt()); 3822 while (!unionStack.isEmpty()) { 3823 TSelectSqlStatement branch = unionStack.pop(); 3824 if (branch == null || processedStmts.contains(branch)) continue; 3825 processedStmts.add(branch); 3826 count += expandStarColumnsInSelect(branch, expandedStarCols); 3827 if (branch.isCombinedQuery()) { 3828 if (branch.getLeftStmt() != null) unionStack.push(branch.getLeftStmt()); 3829 if (branch.getRightStmt() != null) unionStack.push(branch.getRightStmt()); 3830 } else { 3831 // Process tables with subqueries in this branch 3832 if (branch.tables != null) { 3833 for (int i = 0; i < branch.tables.size(); i++) { 3834 TTable table = branch.tables.getTable(i); 3835 if (table != null && table.getSubquery() != null) { 3836 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3837 } 3838 } 3839 } 3840 if (branch.getCteList() != null) { 3841 for (int i = 0; i < branch.getCteList().size(); i++) { 3842 TCTE cte = branch.getCteList().getCTE(i); 3843 if (cte != null && cte.getSubquery() != null) { 3844 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3845 } 3846 } 3847 } 3848 } 3849 } 3850 } 3851 } 3852 3853 // Handle MERGE statements specially - process the USING clause 3854 if (stmt instanceof gudusoft.gsqlparser.stmt.TMergeSqlStatement) { 3855 gudusoft.gsqlparser.stmt.TMergeSqlStatement merge = (gudusoft.gsqlparser.stmt.TMergeSqlStatement) stmt; 3856 TTable usingTable = merge.getUsingTable(); 3857 if (usingTable != null && usingTable.getSubquery() != null) { 3858 count += expandStarColumnsInStatement(usingTable.getSubquery(), processedStmts, expandedStarCols); 3859 } 3860 } 3861 3862 // Process nested statements 3863 if (stmt.getStatements() != null) { 3864 for (int i = 0; i < stmt.getStatements().size(); i++) { 3865 Object nested = stmt.getStatements().get(i); 3866 if (nested instanceof TCustomSqlStatement) { 3867 count += expandStarColumnsInStatement((TCustomSqlStatement) nested, processedStmts, expandedStarCols); 3868 } 3869 } 3870 } 3871 3872 // Process tables with subqueries 3873 if (stmt.tables != null) { 3874 for (int i = 0; i < stmt.tables.size(); i++) { 3875 TTable table = stmt.tables.getTable(i); 3876 if (table != null && table.getSubquery() != null) { 3877 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3878 } 3879 } 3880 } 3881 3882 // Process CTEs 3883 if (stmt.getCteList() != null) { 3884 for (int i = 0; i < stmt.getCteList().size(); i++) { 3885 TCTE cte = stmt.getCteList().getCTE(i); 3886 if (cte != null && cte.getSubquery() != null) { 3887 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3888 } 3889 } 3890 } 3891 3892 return count; 3893 } 3894 3895 /** 3896 * Expand star columns in a SELECT statement's result column list. 3897 */ 3898 private int expandStarColumnsInSelect(TSelectSqlStatement select, Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 3899 if (select == null || select.getResultColumnList() == null) return 0; 3900 3901 int count = 0; 3902 TResultColumnList resultCols = select.getResultColumnList(); 3903 3904 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3905 logInfo("expandStarColumnsInSelect: Processing SELECT with " + resultCols.size() + " result columns"); 3906 } 3907 3908 for (int i = 0; i < resultCols.size(); i++) { 3909 TResultColumn rc = resultCols.getResultColumn(i); 3910 if (rc == null || rc.getExpr() == null) continue; 3911 3912 TObjectName objName = rc.getExpr().getObjectOperand(); 3913 if (objName == null) continue; 3914 3915 String colStr = objName.toString(); 3916 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && colStr != null) { 3917 logInfo("expandStarColumnsInSelect: Column " + i + ": " + colStr); 3918 } 3919 if (colStr == null || !colStr.endsWith("*")) continue; 3920 3921 // This is a star column - expand it 3922 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3923 logInfo("expandStarColumnsInSelect: Found star column: " + colStr); 3924 } 3925 count += expandSingleStarColumn(objName, select, colStr, rc); 3926 3927 // Track the expanded attrs for syncing to column references 3928 ArrayList<TAttributeNode> attrList = objName.getAttributeNodesDerivedFromFromClause(); 3929 if (attrList != null && !attrList.isEmpty()) { 3930 expandedStarCols.put(colStr, attrList); 3931 } 3932 } 3933 3934 return count; 3935 } 3936 3937 /** 3938 * Expand a single star column using push-down inferred columns. 3939 * 3940 * @param starColumn The star column TObjectName (e.g., "*" or "src.*") 3941 * @param select The containing SELECT statement 3942 * @param colStr The string representation of the star column 3943 * @param resultColumn The TResultColumn containing the star (for EXCEPT column list) 3944 * @return Number of columns added 3945 */ 3946 private int expandSingleStarColumn(TObjectName starColumn, TSelectSqlStatement select, String colStr, TResultColumn resultColumn) { 3947 ArrayList<TAttributeNode> attrList = starColumn.getAttributeNodesDerivedFromFromClause(); 3948 3949 // Skip if already expanded 3950 if (!attrList.isEmpty()) { 3951 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3952 logInfo("expandSingleStarColumn: " + colStr + " already expanded with " + attrList.size() + " attrs"); 3953 } 3954 return 0; 3955 } 3956 3957 // Collect EXCEPT column names to exclude from expansion 3958 // (BigQuery: SELECT * EXCEPT (col1, col2) FROM ...) 3959 Set<String> exceptColumns = new HashSet<>(); 3960 if (resultColumn != null) { 3961 TObjectNameList exceptList = resultColumn.getExceptColumnList(); 3962 if (exceptList != null && exceptList.size() > 0) { 3963 for (int i = 0; i < exceptList.size(); i++) { 3964 TObjectName exceptCol = exceptList.getObjectName(i); 3965 if (exceptCol != null) { 3966 String exceptName = exceptCol.getColumnNameOnly(); 3967 if (exceptName != null && !exceptName.isEmpty()) { 3968 exceptColumns.add(exceptName.toUpperCase()); 3969 } 3970 } 3971 } 3972 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3973 logInfo("expandSingleStarColumn: Found " + exceptColumns.size() + 3974 " EXCEPT columns: " + exceptColumns); 3975 } 3976 } 3977 } 3978 3979 int count = 0; 3980 boolean isQualified = colStr.contains(".") && !colStr.equals("*"); 3981 3982 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3983 logInfo("expandSingleStarColumn: " + colStr + " isQualified=" + isQualified); 3984 } 3985 3986 if (isQualified) { 3987 // Qualified star (e.g., "src.*") - find the specific table/namespace 3988 String tablePrefix = colStr.substring(0, colStr.lastIndexOf('.')); 3989 count += expandQualifiedStar(starColumn, select, tablePrefix, attrList, exceptColumns); 3990 } else { 3991 // Unqualified star (*) - expand from all tables in FROM clause 3992 count += expandUnqualifiedStar(starColumn, select, attrList, exceptColumns); 3993 } 3994 3995 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3996 logInfo("expandSingleStarColumn: " + colStr + " expanded to " + count + " columns"); 3997 } 3998 3999 return count; 4000 } 4001 4002 /** 4003 * Expand a qualified star column (e.g., "src.*") using namespace inferred columns. 4004 * 4005 * @param starColumn The star column TObjectName 4006 * @param select The containing SELECT statement 4007 * @param tablePrefix The table prefix (e.g., "src" from "src.*") 4008 * @param attrList The list to add expanded attributes to 4009 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4010 */ 4011 private int expandQualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4012 String tablePrefix, ArrayList<TAttributeNode> attrList, 4013 Set<String> exceptColumns) { 4014 int count = 0; 4015 4016 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4017 logInfo("expandQualifiedStar: tablePrefix=" + tablePrefix + 4018 ", exceptColumns=" + (exceptColumns != null ? exceptColumns : "none")); 4019 } 4020 4021 // Find the source table by alias or name 4022 TTable sourceTable = findTableByPrefixInSelect(select, tablePrefix); 4023 if (sourceTable == null) { 4024 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4025 logInfo("expandQualifiedStar: No source table found for " + tablePrefix); 4026 } 4027 // Fall back to just adding the qualified star attribute 4028 TAttributeNode.addNodeToList( 4029 new TAttributeNode(tablePrefix + ".*", null), 4030 attrList 4031 ); 4032 return 0; 4033 } 4034 4035 // Collect inferred columns from multiple sources: 4036 // 1. The table's own namespace (TableNamespace) 4037 // 2. If the SELECT is a CTE definition, the CTE's namespace 4038 // 3. If the SELECT is a subquery, the containing scope's namespace 4039 Set<String> allInferredCols = new HashSet<>(); 4040 4041 // Source 1: Get namespace for this table 4042 INamespace tableNamespace = scopeBuildResult != null 4043 ? scopeBuildResult.getNamespaceForTable(sourceTable) 4044 : null; 4045 4046 if (tableNamespace != null) { 4047 Set<String> inferredCols = tableNamespace.getInferredColumns(); 4048 if (inferredCols != null) { 4049 allInferredCols.addAll(inferredCols); 4050 } 4051 } 4052 4053 // Source 2: Check if this SELECT is part of a CTE definition 4054 // If so, the CTE namespace may have inferred columns from outer queries 4055 Set<String> cteInferredCols = getInferredColumnsFromContainingCTE(select); 4056 if (cteInferredCols != null) { 4057 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4058 logInfo("expandQualifiedStar: Adding " + cteInferredCols.size() + 4059 " CTE inferred columns for " + tablePrefix); 4060 } 4061 allInferredCols.addAll(cteInferredCols); 4062 } 4063 4064 // Source 3: Check the SELECT's output scope for inferred columns 4065 // IMPORTANT: For qualified star columns (like ta.*), only use scope-level inferred columns 4066 // if they actually exist in this table's namespace. Otherwise we'd incorrectly add columns 4067 // from other tables in the FROM clause to this star's expanded attributes. 4068 IScope selectScope = scopeBuildResult != null 4069 ? scopeBuildResult.getScopeForStatement(select) 4070 : null; 4071 if (selectScope != null) { 4072 Set<String> scopeInferredCols = getInferredColumnsFromScope(selectScope); 4073 if (scopeInferredCols != null && tableNamespace != null) { 4074 // Only add scope-level inferred columns that actually exist in this table's namespace 4075 // This prevents columns from other tables being incorrectly associated with this star 4076 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4077 Set<String> tableInferredCols = tableNamespace.getInferredColumns(); 4078 for (String scopeCol : scopeInferredCols) { 4079 // Check if this column can be resolved within this table's namespace 4080 boolean hasInNamespace = (columnSources != null && columnSources.containsKey(scopeCol)) || 4081 (tableInferredCols != null && tableInferredCols.contains(scopeCol)); 4082 if (hasInNamespace) { 4083 allInferredCols.add(scopeCol); 4084 } 4085 } 4086 } else if (scopeInferredCols != null && tableNamespace == null) { 4087 // No table namespace - add all scope columns (fallback for edge cases) 4088 allInferredCols.addAll(scopeInferredCols); 4089 } 4090 } 4091 4092 if (!allInferredCols.isEmpty()) { 4093 // Expand using inferred columns, filtering out EXCEPT columns 4094 for (String colName : allInferredCols) { 4095 // Skip columns in EXCEPT clause 4096 if (exceptColumns != null && !exceptColumns.isEmpty() && 4097 exceptColumns.contains(colName.toUpperCase())) { 4098 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4099 logInfo("expandQualifiedStar: Skipping EXCEPT column: " + colName); 4100 } 4101 continue; 4102 } 4103 String attrName = tablePrefix + "." + colName; 4104 TAttributeNode.addNodeToList( 4105 new TAttributeNode(attrName, sourceTable), 4106 attrList 4107 ); 4108 count++; 4109 } 4110 } else if (tableNamespace != null) { 4111 // No inferred columns - try to get from namespace's column sources 4112 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4113 if (columnSources != null && !columnSources.isEmpty()) { 4114 for (String colName : columnSources.keySet()) { 4115 // Skip columns in EXCEPT clause 4116 if (exceptColumns != null && !exceptColumns.isEmpty() && 4117 exceptColumns.contains(colName.toUpperCase())) { 4118 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4119 logInfo("expandQualifiedStar: Skipping EXCEPT column from sources: " + colName); 4120 } 4121 continue; 4122 } 4123 String attrName = tablePrefix + "." + colName; 4124 TAttributeNode.addNodeToList( 4125 new TAttributeNode(attrName, sourceTable), 4126 attrList 4127 ); 4128 count++; 4129 } 4130 } 4131 } 4132 4133 // If no columns were added, add the star as fallback 4134 if (count == 0) { 4135 TAttributeNode.addNodeToList( 4136 new TAttributeNode(tablePrefix + ".*", sourceTable), 4137 attrList 4138 ); 4139 } 4140 4141 return count; 4142 } 4143 4144 /** 4145 * Get inferred columns from a CTE that contains the given SELECT statement. 4146 * Used for push-down: when outer queries reference columns from a CTE, 4147 * those columns are inferred in the CTE's namespace and should be used 4148 * to expand star columns in the CTE's SELECT. 4149 */ 4150 private Set<String> getInferredColumnsFromContainingCTE(TSelectSqlStatement select) { 4151 if (select == null || scopeBuildResult == null || namespaceEnhancer == null) { 4152 return null; 4153 } 4154 4155 // Find the CTE that defines this SELECT 4156 Set<INamespace> starNamespaces = namespaceEnhancer.getStarNamespaces(); 4157 if (starNamespaces == null) { 4158 return null; 4159 } 4160 4161 for (INamespace ns : starNamespaces) { 4162 if (ns instanceof CTENamespace) { 4163 CTENamespace cteNs = (CTENamespace) ns; 4164 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 4165 // Check both by reference and by start token position 4166 if (cteSelect == select || 4167 (cteSelect != null && select != null && 4168 cteSelect.getStartToken() != null && select.getStartToken() != null && 4169 cteSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4170 Set<String> inferredCols = cteNs.getInferredColumns(); 4171 if (inferredCols != null && !inferredCols.isEmpty()) { 4172 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4173 logInfo("getInferredColumnsFromContainingCTE: Found CTE " + cteNs.getDisplayName() + 4174 " with " + inferredCols.size() + " inferred columns"); 4175 } 4176 return inferredCols; 4177 } 4178 } 4179 } else if (ns instanceof SubqueryNamespace) { 4180 SubqueryNamespace subNs = (SubqueryNamespace) ns; 4181 TSelectSqlStatement subSelect = subNs.getSelectStatement(); 4182 if (subSelect == select || 4183 (subSelect != null && select != null && 4184 subSelect.getStartToken() != null && select.getStartToken() != null && 4185 subSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4186 Set<String> inferredCols = subNs.getInferredColumns(); 4187 if (inferredCols != null && !inferredCols.isEmpty()) { 4188 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4189 logInfo("getInferredColumnsFromContainingCTE: Found Subquery with " + 4190 inferredCols.size() + " inferred columns"); 4191 } 4192 return inferredCols; 4193 } 4194 } 4195 } 4196 } 4197 4198 return null; 4199 } 4200 4201 /** 4202 * Get inferred columns from namespaces in a scope's FROM clause. 4203 */ 4204 private Set<String> getInferredColumnsFromScope(IScope scope) { 4205 if (scope == null) { 4206 return null; 4207 } 4208 4209 Set<String> result = new HashSet<>(); 4210 4211 // Check all namespaces in the scope's children 4212 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4213 INamespace ns = child.getNamespace(); 4214 if (ns != null) { 4215 Set<String> inferredCols = ns.getInferredColumns(); 4216 if (inferredCols != null) { 4217 result.addAll(inferredCols); 4218 } 4219 } 4220 } 4221 4222 return result.isEmpty() ? null : result; 4223 } 4224 4225 /** 4226 * Expand an unqualified star column (*) using all tables in FROM clause. 4227 * 4228 * @param starColumn The star column TObjectName 4229 * @param select The containing SELECT statement 4230 * @param attrList The list to add expanded attributes to 4231 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4232 */ 4233 private int expandUnqualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4234 ArrayList<TAttributeNode> attrList, Set<String> exceptColumns) { 4235 int count = 0; 4236 4237 if (select.tables == null) return 0; 4238 4239 for (int i = 0; i < select.tables.size(); i++) { 4240 TTable table = select.tables.getTable(i); 4241 if (table == null) continue; 4242 4243 // Skip certain table types 4244 if (table.getTableType() == ETableSource.join) continue; 4245 4246 String tablePrefix = table.getAliasName(); 4247 if (tablePrefix == null || tablePrefix.isEmpty()) { 4248 tablePrefix = table.getName(); 4249 } 4250 if (tablePrefix == null) continue; 4251 4252 // Get namespace for this table 4253 INamespace namespace = scopeBuildResult != null 4254 ? scopeBuildResult.getNamespaceForTable(table) 4255 : null; 4256 4257 if (namespace != null) { 4258 Set<String> inferredCols = namespace.getInferredColumns(); 4259 4260 if (inferredCols != null && !inferredCols.isEmpty()) { 4261 for (String colName : inferredCols) { 4262 // Skip columns in EXCEPT clause 4263 if (exceptColumns != null && !exceptColumns.isEmpty() && 4264 exceptColumns.contains(colName.toUpperCase())) { 4265 continue; 4266 } 4267 String attrName = tablePrefix + "." + colName; 4268 TAttributeNode.addNodeToList( 4269 new TAttributeNode(attrName, table), 4270 attrList 4271 ); 4272 count++; 4273 } 4274 } else { 4275 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 4276 if (columnSources != null && !columnSources.isEmpty()) { 4277 for (String colName : columnSources.keySet()) { 4278 // Skip columns in EXCEPT clause 4279 if (exceptColumns != null && !exceptColumns.isEmpty() && 4280 exceptColumns.contains(colName.toUpperCase())) { 4281 continue; 4282 } 4283 String attrName = tablePrefix + "." + colName; 4284 TAttributeNode.addNodeToList( 4285 new TAttributeNode(attrName, table), 4286 attrList 4287 ); 4288 count++; 4289 } 4290 } 4291 } 4292 } 4293 4294 // If no columns for this table, add the star as fallback 4295 if (count == 0 || (namespace != null && namespace.getInferredColumns().isEmpty() 4296 && namespace.getAllColumnSources().isEmpty())) { 4297 TAttributeNode.addNodeToList( 4298 new TAttributeNode(tablePrefix + ".*", table), 4299 attrList 4300 ); 4301 } 4302 } 4303 4304 return count; 4305 } 4306 4307 /** 4308 * Find a table by its prefix (alias or name) in a SELECT statement. 4309 */ 4310 private TTable findTableByPrefixInSelect(TSelectSqlStatement select, String prefix) { 4311 if (select == null || select.tables == null || prefix == null) return null; 4312 4313 // Normalize prefix (remove backticks, quotes, schema prefix for comparison) 4314 String normalizedPrefix = normalizeTablePrefix(prefix); 4315 4316 for (int i = 0; i < select.tables.size(); i++) { 4317 TTable table = select.tables.getTable(i); 4318 if (table == null) continue; 4319 4320 // Check alias first 4321 String alias = table.getAliasName(); 4322 if (alias != null && normalizeTablePrefix(alias).equalsIgnoreCase(normalizedPrefix)) { 4323 return table; 4324 } 4325 4326 // Check table name 4327 String name = table.getName(); 4328 if (name != null && normalizeTablePrefix(name).equalsIgnoreCase(normalizedPrefix)) { 4329 return table; 4330 } 4331 4332 // Check full table name (with schema) 4333 if (table.getTableName() != null) { 4334 String fullName = table.getTableName().toString(); 4335 if (fullName != null && normalizeTablePrefix(fullName).equalsIgnoreCase(normalizedPrefix)) { 4336 return table; 4337 } 4338 } 4339 } 4340 4341 return null; 4342 } 4343 4344 /** 4345 * Normalize table prefix for comparison (remove quotes, backticks). 4346 */ 4347 private String normalizeTablePrefix(String prefix) { 4348 if (prefix == null) return ""; 4349 String result = prefix.trim(); 4350 // Remove backticks 4351 if (result.startsWith("`") && result.endsWith("`")) { 4352 result = result.substring(1, result.length() - 1); 4353 } 4354 // Remove double quotes 4355 if (result.startsWith("\"") && result.endsWith("\"")) { 4356 result = result.substring(1, result.length() - 1); 4357 } 4358 // Remove brackets 4359 if (result.startsWith("[") && result.endsWith("]")) { 4360 result = result.substring(1, result.length() - 1); 4361 } 4362 return result; 4363 } 4364 4365 /** 4366 * Get resolution statistics 4367 */ 4368 public ResolutionStatistics getStatistics() { 4369 return resolutionContext.getStatistics(); 4370 } 4371 4372 /** 4373 * Get the resolution context (for advanced queries) 4374 */ 4375 public ResolutionContext getContext() { 4376 return resolutionContext; 4377 } 4378 4379 /** 4380 * Get the global scope 4381 */ 4382 public GlobalScope getGlobalScope() { 4383 return globalScope; 4384 } 4385 4386 /** 4387 * Get the configuration 4388 */ 4389 public TSQLResolverConfig getConfig() { 4390 return config; 4391 } 4392 4393 /** 4394 * Get the pass history (for iterative resolution analysis) 4395 * 4396 * @return list of all resolution passes (empty if non-iterative or not yet resolved) 4397 */ 4398 public List<ResolutionPass> getPassHistory() { 4399 return new ArrayList<>(passHistory); 4400 } 4401 4402 /** 4403 * Get the convergence detector (for iterative resolution analysis) 4404 * 4405 * @return convergence detector (null if iterative resolution is disabled) 4406 */ 4407 public ConvergenceDetector getConvergenceDetector() { 4408 return convergenceDetector; 4409 } 4410 4411 /** 4412 * Get the scope build result (for testing and analysis) 4413 * 4414 * @return scope build result from ScopeBuilder (null if not yet resolved) 4415 */ 4416 public ScopeBuildResult getScopeBuildResult() { 4417 return scopeBuildResult; 4418 } 4419 4420 /** 4421 * Get the resolution result access interface. 4422 * This provides a clean, statement-centric API for accessing resolution results. 4423 * 4424 * <p>Usage example:</p> 4425 * <pre> 4426 * TSQLResolver2 resolver = new TSQLResolver2(null, parser.sqlstatements); 4427 * resolver.resolve(); 4428 * 4429 * IResolutionResult result = resolver.getResult(); 4430 * 4431 * for (TCustomSqlStatement stmt : parser.sqlstatements) { 4432 * for (TTable table : result.getTables(stmt)) { 4433 * System.out.println("Table: " + table.getFullName()); 4434 * for (TObjectName col : result.getColumnsForTable(stmt, table)) { 4435 * System.out.println(" Column: " + col.getColumnNameOnly()); 4436 * } 4437 * } 4438 * } 4439 * </pre> 4440 * 4441 * @return resolution result access interface 4442 * @throws IllegalStateException if resolve() has not been called 4443 */ 4444 public IResolutionResult getResult() { 4445 if (scopeBuildResult == null) { 4446 throw new IllegalStateException( 4447 "Must call resolve() before getResult()"); 4448 } 4449 return new ResolutionResultImpl(scopeBuildResult, sqlStatements); 4450 } 4451 4452 // ===== Star Column Reverse Inference Support (Principle 3) ===== 4453 4454 /** 4455 * Star Column push-down context for reverse inference. 4456 * Tracks which columns should be added to which Namespaces based on 4457 * outer layer references. 4458 */ 4459 private static class StarPushDownContext { 4460 /** Namespace -> (ColumnName -> Confidence) */ 4461 private final Map<INamespace, Map<String, Double>> pushDownMap = new HashMap<>(); 4462 4463 /** 4464 * Record that a column should be added to a namespace. 4465 * If the same column is pushed multiple times, keep the highest confidence. 4466 */ 4467 public void pushColumn(INamespace namespace, String columnName, double confidence) { 4468 Map<String, Double> columns = pushDownMap.computeIfAbsent(namespace, k -> new HashMap<>()); 4469 columns.put(columnName, Math.max(confidence, columns.getOrDefault(columnName, 0.0))); 4470 } 4471 4472 /** 4473 * Get all columns that should be pushed to each namespace. 4474 */ 4475 public Map<INamespace, java.util.Set<String>> getAllPushDownColumns() { 4476 Map<INamespace, java.util.Set<String>> result = new HashMap<>(); 4477 for (Map.Entry<INamespace, Map<String, Double>> entry : pushDownMap.entrySet()) { 4478 result.put(entry.getKey(), entry.getValue().keySet()); 4479 } 4480 return result; 4481 } 4482 4483 /** 4484 * Get the confidence score for a specific column in a namespace. 4485 */ 4486 public double getConfidence(INamespace namespace, String columnName) { 4487 return pushDownMap.getOrDefault(namespace, java.util.Collections.emptyMap()) 4488 .getOrDefault(columnName, 0.0); 4489 } 4490 4491 /** 4492 * Get the total number of columns to be pushed down across all namespaces. 4493 */ 4494 public int getTotalPushedColumns() { 4495 return pushDownMap.values().stream() 4496 .mapToInt(Map::size) 4497 .sum(); 4498 } 4499 } 4500 4501 /** 4502 * Represents a star column source (CTE or subquery with SELECT *). 4503 * Used for reverse inference to track which columns are required from the star. 4504 */ 4505 private static class StarColumnSource { 4506 private final String name; // CTE name or subquery alias 4507 private final INamespace namespace; // The namespace for this source 4508 private final INamespace underlyingTableNamespace; // Namespace of the table behind SELECT * 4509 private final java.util.Set<String> requiredColumns = new java.util.HashSet<>(); 4510 4511 public StarColumnSource(String name, INamespace namespace, INamespace underlyingTableNamespace) { 4512 this.name = name; 4513 this.namespace = namespace; 4514 this.underlyingTableNamespace = underlyingTableNamespace; 4515 } 4516 4517 public String getName() { 4518 return name; 4519 } 4520 4521 public INamespace getNamespace() { 4522 return namespace; 4523 } 4524 4525 public void addRequiredColumn(String columnName) { 4526 requiredColumns.add(columnName); 4527 } 4528 4529 public java.util.Set<String> getRequiredColumns() { 4530 return requiredColumns; 4531 } 4532 4533 public boolean hasUnderlyingTable() { 4534 return underlyingTableNamespace != null; 4535 } 4536 4537 public INamespace getUnderlyingTableNamespace() { 4538 return underlyingTableNamespace; 4539 } 4540 4541 @Override 4542 public String toString() { 4543 return String.format("StarColumnSource[%s, required=%d]", name, requiredColumns.size()); 4544 } 4545 } 4546 4547 /** 4548 * Collect all star column sources (CTEs and subqueries with SELECT *). 4549 * Traverses the scope tree to find CTENamespace and SubqueryNamespace 4550 * that use SELECT * in their subqueries. 4551 */ 4552 private List<StarColumnSource> collectAllStarColumnSources() { 4553 List<StarColumnSource> sources = new ArrayList<>(); 4554 4555 // Traverse global scope tree 4556 if (globalScope != null) { 4557 collectStarSourcesFromScope(globalScope, sources); 4558 } 4559 4560 // Also traverse UPDATE scopes (for Teradata UPDATE...FROM syntax) 4561 if (scopeBuilder != null) { 4562 for (UpdateScope updateScope : scopeBuilder.getUpdateScopeMap().values()) { 4563 collectStarSourcesFromScope(updateScope, sources); 4564 } 4565 for (DeleteScope deleteScope : scopeBuilder.getDeleteScopeMap().values()) { 4566 collectStarSourcesFromScope(deleteScope, sources); 4567 } 4568 } 4569 4570 logDebug("Collected " + sources.size() + " star column sources"); 4571 return sources; 4572 } 4573 4574 /** 4575 * Recursively collect star column sources from a scope and its children. 4576 */ 4577 private void collectStarSourcesFromScope(IScope scope, List<StarColumnSource> sources) { 4578 // Check all child namespaces in this scope 4579 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4580 INamespace namespace = child.getNamespace(); 4581 4582 // Use the new interface method to check for star columns 4583 if (namespace.hasStarColumn()) { 4584 TSelectSqlStatement selectStmt = namespace.getSelectStatement(); 4585 INamespace underlyingNs = selectStmt != null ? getFirstTableNamespace(selectStmt) : null; 4586 4587 StarColumnSource starSource = new StarColumnSource( 4588 namespace.getDisplayName(), 4589 namespace, 4590 underlyingNs 4591 ); 4592 sources.add(starSource); 4593 4594 logDebug("Found star source: " + namespace.getDisplayName()); 4595 } 4596 } 4597 4598 // Recursively traverse child scopes based on scope type 4599 if (scope instanceof SelectScope) { 4600 SelectScope selectScope = (SelectScope) scope; 4601 if (selectScope.getFromScope() != null) { 4602 collectStarSourcesFromScope(selectScope.getFromScope(), sources); 4603 } 4604 } else if (scope instanceof UpdateScope) { 4605 UpdateScope updateScope = (UpdateScope) scope; 4606 if (updateScope.getFromScope() != null) { 4607 collectStarSourcesFromScope(updateScope.getFromScope(), sources); 4608 } 4609 } else if (scope instanceof DeleteScope) { 4610 DeleteScope deleteScope = (DeleteScope) scope; 4611 if (deleteScope.getFromScope() != null) { 4612 collectStarSourcesFromScope(deleteScope.getFromScope(), sources); 4613 } 4614 } 4615 } 4616 4617 4618 /** 4619 * Get the first table namespace from a SELECT statement's FROM clause. 4620 * Returns the DynamicStarSource if available. 4621 */ 4622 private INamespace getFirstTableNamespace(TSelectSqlStatement select) { 4623 if (select == null || select.tables == null || select.tables.size() == 0) { 4624 return null; 4625 } 4626 4627 // Get first table 4628 TTable firstTable = select.tables.getTable(0); 4629 String tableName = firstTable.getAliasName() != null 4630 ? firstTable.getAliasName() 4631 : firstTable.getName(); 4632 4633 // Search for corresponding namespace in all dynamic namespaces 4634 List<INamespace> dynamicNamespaces = getAllDynamicNamespaces(); 4635 for (INamespace ns : dynamicNamespaces) { 4636 if (ns.getDisplayName().equals(tableName)) { 4637 return ns; 4638 } 4639 } 4640 4641 return null; 4642 } 4643 4644 /** 4645 * Collect all outer references to a star column source. 4646 * Searches through allColumnReferences for columns that reference this star source. 4647 */ 4648 private List<TObjectName> collectOuterReferencesToSource(StarColumnSource starSource) { 4649 List<TObjectName> references = new ArrayList<>(); 4650 4651 if (starSource == null || starSource.getName() == null) { 4652 return references; 4653 } 4654 4655 String sourceName = starSource.getName(); 4656 4657 // Search through all collected column references 4658 for (TObjectName objName : allColumnReferences) { 4659 if (objName == null) { 4660 continue; 4661 } 4662 4663 // Check if this column reference is from the star source 4664 // E.g., for CTE named "my_cte", check if objName is like "my_cte.col1" 4665 String tableQualifier = getTableQualifier(objName); 4666 4667 if (tableQualifier != null && tableQualifier.equalsIgnoreCase(sourceName)) { 4668 references.add(objName); 4669 logDebug("Found outer reference: " + objName + " -> " + sourceName); 4670 } 4671 } 4672 4673 logDebug("Collected " + references.size() + " outer references for: " + sourceName); 4674 return references; 4675 } 4676 4677 /** 4678 * Get the table qualifier from a TObjectName. 4679 * E.g., for "schema.table.column", returns "table" 4680 * E.g., for "table.column", returns "table" 4681 * E.g., for "column", returns null 4682 */ 4683 private String getTableQualifier(TObjectName objName) { 4684 if (objName == null) { 4685 return null; 4686 } 4687 4688 // TObjectName has parts like: [schema, table, column] 4689 // or [table, column] 4690 // or [column] 4691 4692 // If there are 3 or more parts, the second-to-last is the table 4693 // If there are 2 parts, the first is the table 4694 // If there is 1 part, there's no table qualifier 4695 4696 String fullName = objName.toString(); 4697 String[] parts = fullName.split("\\."); 4698 4699 if (parts.length >= 3) { 4700 // schema.table.column -> return table 4701 return parts[parts.length - 2]; 4702 } else if (parts.length == 2) { 4703 // table.column -> return table 4704 return parts[0]; 4705 } else { 4706 // Just column name, no qualifier 4707 return null; 4708 } 4709 } 4710 4711 /** 4712 * Get all DynamicStarSource namespaces from the scope tree. 4713 * This is used to apply inference results to namespaces that need enhancement. 4714 */ 4715 private List<INamespace> getAllDynamicNamespaces() { 4716 List<INamespace> result = new ArrayList<>(); 4717 4718 // Collect from global scope tree 4719 if (globalScope != null) { 4720 collectDynamicNamespacesFromScope(globalScope, result); 4721 } 4722 4723 return result; 4724 } 4725 4726 /** 4727 * Recursively collect DynamicStarSource namespaces from a scope and its children. 4728 */ 4729 private void collectDynamicNamespacesFromScope(IScope scope, List<INamespace> result) { 4730 if (scope == null) { 4731 return; 4732 } 4733 4734 // Get all child namespaces from this scope 4735 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4736 INamespace namespace = child.getNamespace(); 4737 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.DynamicStarSource) { 4738 result.add(namespace); 4739 logDebug("Found DynamicStarSource: " + namespace.getDisplayName()); 4740 } 4741 } 4742 4743 // Recursively traverse child scopes based on scope type 4744 if (scope instanceof SelectScope) { 4745 SelectScope selectScope = (SelectScope) scope; 4746 4747 // Traverse FROM scope 4748 if (selectScope.getFromScope() != null) { 4749 collectDynamicNamespacesFromScope(selectScope.getFromScope(), result); 4750 } 4751 } else if (scope instanceof CTEScope) { 4752 CTEScope cteScope = (CTEScope) scope; 4753 4754 // CTEs are already included in the children check above 4755 // But we need to check their subqueries by traversing nested scopes 4756 // The CTE namespaces themselves contain references to subquery scopes 4757 } else if (scope instanceof FromScope) { 4758 FromScope fromScope = (FromScope) scope; 4759 4760 // FROM scope children are already checked above 4761 // No additional child scopes to traverse 4762 } else if (scope instanceof GroupByScope) { 4763 GroupByScope groupByScope = (GroupByScope) scope; 4764 4765 // GroupBy scope typically doesn't have child scopes 4766 } else if (scope instanceof HavingScope) { 4767 HavingScope havingScope = (HavingScope) scope; 4768 4769 // Having scope typically doesn't have child scopes 4770 } else if (scope instanceof OrderByScope) { 4771 OrderByScope orderByScope = (OrderByScope) scope; 4772 4773 // OrderBy scope typically doesn't have child scopes 4774 } 4775 4776 // Additionally, traverse parent-child scope relationships 4777 // by checking if any of the namespaces contain nested SELECT statements 4778 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4779 INamespace namespace = child.getNamespace(); 4780 4781 // If this is a SubqueryNamespace, it contains a SELECT with its own scope tree 4782 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace) { 4783 // Subquery scopes are processed during scope building 4784 // and would be in statementScopeCache if we tracked them 4785 } 4786 } 4787 } 4788 4789 // ===== Logging helpers ===== 4790 4791 private void logInfo(String message) { 4792 TBaseType.log("[TSQLResolver2] " + message, TLog.INFO); 4793 } 4794 4795 private void logDebug(String message) { 4796 TBaseType.log("[TSQLResolver2] " + message, TLog.DEBUG); 4797 } 4798 4799 private void logError(String message) { 4800 TBaseType.log("[TSQLResolver2] " + message, TLog.ERROR); 4801 } 4802}