001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.TBaseType; 004import gudusoft.gsqlparser.TCustomSqlStatement; 005import gudusoft.gsqlparser.IRelation; 006import gudusoft.gsqlparser.TLog; 007import gudusoft.gsqlparser.TSourceToken; 008import gudusoft.gsqlparser.TStatementList; 009import gudusoft.gsqlparser.ETableSource; 010import gudusoft.gsqlparser.EDbVendor; 011import gudusoft.gsqlparser.EDbObjectType; 012import gudusoft.gsqlparser.EErrorType; 013import gudusoft.gsqlparser.ESqlClause; 014import gudusoft.gsqlparser.ESqlStatementType; 015import gudusoft.gsqlparser.TSyntaxError; 016import gudusoft.gsqlparser.stmt.dax.TDaxStmt; 017import gudusoft.gsqlparser.stmt.TAlterTableStatement; 018import gudusoft.gsqlparser.stmt.TCreateTableSqlStatement; 019import gudusoft.gsqlparser.stmt.TInsertSqlStatement; 020import gudusoft.gsqlparser.stmt.TUpdateSqlStatement; 021import gudusoft.gsqlparser.stmt.TDeleteSqlStatement; 022import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 023import gudusoft.gsqlparser.compiler.TContext; 024import gudusoft.gsqlparser.nodes.TObjectName; 025import gudusoft.gsqlparser.nodes.TObjectNameList; 026import gudusoft.gsqlparser.nodes.TTable; 027import gudusoft.gsqlparser.nodes.TJoinExpr; 028import gudusoft.gsqlparser.nodes.TParseTreeNode; 029import gudusoft.gsqlparser.nodes.TParseTreeVisitor; 030import gudusoft.gsqlparser.nodes.TResultColumn; 031import gudusoft.gsqlparser.nodes.TResultColumnList; 032import gudusoft.gsqlparser.nodes.TQualifyClause; 033import gudusoft.gsqlparser.nodes.TExpression; 034import gudusoft.gsqlparser.EExpressionType; 035import gudusoft.gsqlparser.resolver2.model.ColumnSource; 036import gudusoft.gsqlparser.resolver2.model.FromScopeIndex; 037import gudusoft.gsqlparser.resolver2.model.ResolutionContext; 038import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 039import gudusoft.gsqlparser.resolver2.model.ResolutionStatistics; 040import gudusoft.gsqlparser.resolver2.ResolutionStatus; 041import gudusoft.gsqlparser.resolver2.result.IResolutionResult; 042import gudusoft.gsqlparser.resolver2.result.ResolutionResultImpl; 043import gudusoft.gsqlparser.resolver2.scope.FromScope; 044import gudusoft.gsqlparser.resolver2.scope.GlobalScope; 045import gudusoft.gsqlparser.resolver2.scope.IScope; 046import gudusoft.gsqlparser.resolver2.scope.SelectScope; 047import gudusoft.gsqlparser.resolver2.scope.CTEScope; 048import gudusoft.gsqlparser.resolver2.scope.GroupByScope; 049import gudusoft.gsqlparser.resolver2.scope.HavingScope; 050import gudusoft.gsqlparser.resolver2.scope.OrderByScope; 051import gudusoft.gsqlparser.resolver2.scope.UpdateScope; 052import gudusoft.gsqlparser.resolver2.scope.DeleteScope; 053import gudusoft.gsqlparser.resolver2.namespace.INamespace; 054import gudusoft.gsqlparser.resolver2.namespace.TableNamespace; 055import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 056import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 057import gudusoft.gsqlparser.nodes.TCTE; 058import gudusoft.gsqlparser.nodes.TCTEList; 059import gudusoft.gsqlparser.nodes.TUnnestClause; 060import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 061import gudusoft.gsqlparser.resolver2.iterative.ConvergenceDetector; 062import gudusoft.gsqlparser.resolver2.iterative.ResolutionPass; 063import gudusoft.gsqlparser.resolver2.enhancement.NamespaceEnhancer; 064import gudusoft.gsqlparser.resolver2.enhancement.EnhancementResult; 065import gudusoft.gsqlparser.resolver2.enhancement.CollectedColumnRef; 066import gudusoft.gsqlparser.resolver2.metadata.BatchMetadataCollector; 067import gudusoft.gsqlparser.resolver2.context.DatabaseContextTracker; 068import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 069import gudusoft.gsqlparser.sqlenv.TSQLEnv; 070import gudusoft.gsqlparser.TAttributeNode; 071import gudusoft.gsqlparser.resolver2.binding.BindingDiagnostic; 072import gudusoft.gsqlparser.resolver2.binding.BindingDiagnosticPostPass; 073import gudusoft.gsqlparser.resolver2.binding.BindingResult; 074 075import java.util.ArrayDeque; 076import java.util.ArrayList; 077import java.util.Deque; 078import java.util.HashMap; 079import java.util.HashSet; 080import java.util.IdentityHashMap; 081import java.util.List; 082import java.util.Map; 083import java.util.Set; 084 085// ScopeBuilder for visitor-based scope construction 086import gudusoft.gsqlparser.resolver2.ScopeBuilder; 087import gudusoft.gsqlparser.resolver2.ScopeBuildResult; 088 089/** 090 * New SQL Resolver - Phase 2 Enhanced Framework 091 * 092 * This is the main entry point for the new resolution architecture. 093 * Provides improved column-to-table resolution with: 094 * - Clear scope-based name resolution 095 * - Full candidate collection for ambiguous cases 096 * - Confidence-scored inference 097 * - Better tracing and debugging 098 * 099 * Usage: 100 * <pre> 101 * TSQLResolver2 resolver = new TSQLResolver2(context, statements); 102 * boolean success = resolver.resolve(); 103 * ResolutionStatistics stats = resolver.getStatistics(); 104 * </pre> 105 * 106 * Phase 1 capabilities: 107 * - Basic SELECT statement resolution 108 * - Table and subquery namespaces 109 * - Qualified and unqualified column references 110 * - FROM clause scope management 111 * 112 * Phase 2 capabilities: 113 * - JOIN scope handling with nullable semantics 114 * - CTE (WITH clause) resolution 115 * - Iterative resolution framework (auto-converges after first pass if no iteration needed) 116 * 117 * Future phases will add: 118 * - Evidence-based inference 119 * - Star column expansion 120 */ 121public class TSQLResolver2 { 122 123 private final TContext globalContext; 124 private final TStatementList sqlStatements; 125 private final TSQLResolverConfig config; 126 private final ResolutionContext resolutionContext; 127 private final NameResolver nameResolver; 128 129 /** Global scope (root of scope tree) */ 130 private GlobalScope globalScope; 131 132 /** Convergence detector for iterative resolution */ 133 private ConvergenceDetector convergenceDetector; 134 135 /** History of all resolution passes */ 136 private final List<ResolutionPass> passHistory; 137 138 /** 139 * Scope cache for iterative resolution. 140 * Maps statements to their scope trees to avoid rebuilding scopes on each pass. 141 * Key: TCustomSqlStatement, Value: SelectScope (or other scope type) 142 */ 143 private final java.util.Map<Object, IScope> statementScopeCache; 144 145 /** 146 * Column-to-Scope mapping for iterative resolution (Principle 1: Scope完全复用). 147 * Built once in Pass 1, reused in Pass 2+ to avoid rebuilding scopes. 148 * Maps each TObjectName (column reference) to the IScope where it should be resolved. 149 */ 150 private final java.util.Map<TObjectName, IScope> columnToScopeMap; 151 152 /** 153 * FromScope index cache for O(1) table/namespace lookups (Performance Optimization B). 154 * Maps FromScope instances to their pre-built indexes. 155 * Built lazily on first access, cleared at the start of each resolve() call. 156 * Uses IdentityHashMap because we need object identity, not equals(). 157 */ 158 private final Map<IScope, FromScopeIndex> fromScopeIndexCache; 159 160 /** 161 * Cache for Teradata NAMED alias lookup. 162 * Maps SELECT statements to their alias index (alias name -> TResultColumn). 163 * Uses IdentityHashMap because we need object identity, not equals(). 164 * Optimization C: Reduces O(cols * select_items) to O(cols) for Teradata. 165 */ 166 private final Map<TSelectSqlStatement, Map<String, TResultColumn>> teradataNamedAliasCache; 167 168 /** 169 * All column references collected during Pass 1 (Principle 1: Scope完全复用). 170 * Used in Pass 2+ to re-resolve names without rebuilding the scope tree. 171 */ 172 private final List<TObjectName> allColumnReferences; 173 174 /** 175 * ScopeBuilder for visitor-based scope construction. 176 * Replaces manual scope building with proper nested scope handling. 177 */ 178 private final ScopeBuilder scopeBuilder; 179 180 /** 181 * Result from ScopeBuilder containing the complete scope tree. 182 * This is populated in Pass 1 and reused in Pass 2+. 183 */ 184 private ScopeBuildResult scopeBuildResult; 185 186 /** 187 * Slice S5 — populated by {@link BindingDiagnosticPostPass} after the 188 * iterative resolver loop converges, when at least one binding flag is 189 * on. Stays at {@link BindingResult#empty()} when binding is disabled 190 * or {@code resolve()} has not yet been called (plan §5.6, §12). 191 */ 192 private BindingResult bindingResult = BindingResult.empty(); 193 194 /** 195 * NamespaceEnhancer for explicit column collection and enhancement. 196 * Handles the explicit namespace enhancement phase between resolution passes. 197 * Columns are collected during resolution and added to namespaces explicitly. 198 */ 199 private NamespaceEnhancer namespaceEnhancer; 200 201 /** 202 * Create resolver with default configuration 203 */ 204 public TSQLResolver2(TContext context, TStatementList statements) { 205 this(context, statements, TSQLResolverConfig.createDefault()); 206 } 207 208 /** 209 * Create resolver with custom configuration 210 */ 211 public TSQLResolver2(TContext context, TStatementList statements, TSQLResolverConfig config) { 212 this.globalContext = context; 213 this.sqlStatements = statements; 214 this.config = config; 215 this.resolutionContext = new ResolutionContext(); 216 this.nameResolver = new NameResolver(config, resolutionContext); 217 this.passHistory = new ArrayList<>(); 218 this.statementScopeCache = new java.util.HashMap<>(); 219 this.columnToScopeMap = new java.util.HashMap<>(); 220 this.fromScopeIndexCache = new IdentityHashMap<>(); 221 this.teradataNamedAliasCache = new IdentityHashMap<>(); 222 this.allColumnReferences = new ArrayList<>(); 223 224 // Initialize ScopeBuilder for visitor-based scope construction 225 this.scopeBuilder = new ScopeBuilder(context, config.getNameMatcher()); 226 // Pass guessColumnStrategy from config for namespace isolation (prevents test side effects) 227 if (config.hasCustomGuessColumnStrategy()) { 228 this.scopeBuilder.setGuessColumnStrategy(config.getGuessColumnStrategy()); 229 } 230 231 // If context is null, try to get TSQLEnv from statements 232 // This allows TSQLEnv to flow from parser.setSqlEnv() through statements 233 if (statements != null && statements.size() > 0) { 234 try { 235 TCustomSqlStatement firstStmt = statements.get(0); 236 if (firstStmt != null && firstStmt.getGlobalScope() != null && 237 firstStmt.getGlobalScope().getSqlEnv() != null) { 238 this.scopeBuilder.setSqlEnv(firstStmt.getGlobalScope().getSqlEnv()); 239 } 240 } catch (Exception e) { 241 // Silently ignore - SQLEnv is optional enhancement 242 } 243 } 244 245 // Initialize convergence detector for iterative resolution 246 this.convergenceDetector = new ConvergenceDetector( 247 config.getMaxIterations(), 248 config.getStablePassesForConvergence(), 249 config.getMinProgressRate() 250 ); 251 252 // Initialize namespace enhancer for explicit column collection 253 // Debug mode follows the global resolver log setting 254 this.namespaceEnhancer = new NamespaceEnhancer(TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE); 255 } 256 257 /** 258 * Set the TSQLEnv to use for table metadata lookup. 259 * This allows external callers to provide TSQLEnv if automatic detection fails. 260 * 261 * @param sqlEnv the SQL environment containing table metadata 262 */ 263 public void setSqlEnv(gudusoft.gsqlparser.sqlenv.TSQLEnv sqlEnv) { 264 if (scopeBuilder != null) { 265 scopeBuilder.setSqlEnv(sqlEnv); 266 } 267 } 268 269 /** 270 * Get the TSQLEnv used for table metadata lookup. 271 * 272 * @return the SQL environment, or null if not set 273 */ 274 public gudusoft.gsqlparser.sqlenv.TSQLEnv getSqlEnv() { 275 return scopeBuilder != null ? scopeBuilder.getSqlEnv() : null; 276 } 277 278 /** 279 * Get the set of virtual trigger tables (deleted/inserted in SQL Server triggers). 280 * These tables should be excluded from table output since their columns are 281 * resolved to the trigger's target table. 282 * 283 * @return Set of TTable objects that are virtual trigger tables 284 */ 285 public java.util.Set<gudusoft.gsqlparser.nodes.TTable> getVirtualTriggerTables() { 286 return scopeBuilder != null ? scopeBuilder.getVirtualTriggerTables() : java.util.Collections.emptySet(); 287 } 288 289 /** 290 * Get the SQL statements being resolved. 291 * 292 * @return the list of SQL statements 293 */ 294 public TStatementList getStatements() { 295 return sqlStatements; 296 } 297 298 // Performance timing fields (instance-level for single resolve() call) 299 private long timeScopeBuilder = 0; 300 private long timeNameResolution = 0; 301 private long timeEnhancement = 0; 302 private long timeLegacySync = 0; 303 private long timeOther = 0; 304 305 // Global accumulators for profiling across all resolve() calls 306 private static long globalTimeScopeBuilder = 0; 307 private static long globalTimeNameResolution = 0; 308 private static long globalTimeEnhancement = 0; 309 private static long globalTimeLegacySync = 0; 310 private static long globalTimeOther = 0; 311 private static int globalResolveCount = 0; 312 313 /** 314 * Reset global timing accumulators. 315 */ 316 public static void resetGlobalTimings() { 317 globalTimeScopeBuilder = 0; 318 globalTimeNameResolution = 0; 319 globalTimeEnhancement = 0; 320 globalTimeLegacySync = 0; 321 globalTimeOther = 0; 322 globalResolveCount = 0; 323 // Reset detailed legacy sync timings 324 globalTimeClearLinked = 0; 325 globalTimeFillAttributes = 0; 326 globalTimeSyncColumns = 0; 327 globalTimePopulateOrphans = 0; 328 globalTimeClearHints = 0; 329 } 330 331 /** 332 * Get global performance timing breakdown for profiling across all resolve() calls. 333 * @return formatted timing information 334 */ 335 public static String getGlobalPerformanceTimings() { 336 long total = globalTimeScopeBuilder + globalTimeNameResolution + globalTimeEnhancement + globalTimeLegacySync + globalTimeOther; 337 return String.format( 338 "TSQLResolver2 Global Timings (across %d resolve() calls):\n" + 339 " ScopeBuilder: %d ms (%.1f%%)\n" + 340 " NameResolution: %d ms (%.1f%%)\n" + 341 " Enhancement: %d ms (%.1f%%)\n" + 342 " LegacySync: %d ms (%.1f%%)\n" + 343 " Other: %d ms (%.1f%%)\n" + 344 " Total: %d ms", 345 globalResolveCount, 346 globalTimeScopeBuilder, total > 0 ? 100.0 * globalTimeScopeBuilder / total : 0, 347 globalTimeNameResolution, total > 0 ? 100.0 * globalTimeNameResolution / total : 0, 348 globalTimeEnhancement, total > 0 ? 100.0 * globalTimeEnhancement / total : 0, 349 globalTimeLegacySync, total > 0 ? 100.0 * globalTimeLegacySync / total : 0, 350 globalTimeOther, total > 0 ? 100.0 * globalTimeOther / total : 0, 351 total); 352 } 353 354 /** 355 * Get performance timing breakdown for profiling. 356 * @return formatted timing information 357 */ 358 public String getPerformanceTimings() { 359 long total = timeScopeBuilder + timeNameResolution + timeEnhancement + timeLegacySync + timeOther; 360 return String.format( 361 "TSQLResolver2 Timings:\n" + 362 " ScopeBuilder: %d ms (%.1f%%)\n" + 363 " NameResolution: %d ms (%.1f%%)\n" + 364 " Enhancement: %d ms (%.1f%%)\n" + 365 " LegacySync: %d ms (%.1f%%)\n" + 366 " Other: %d ms (%.1f%%)\n" + 367 " Total: %d ms", 368 timeScopeBuilder, total > 0 ? 100.0 * timeScopeBuilder / total : 0, 369 timeNameResolution, total > 0 ? 100.0 * timeNameResolution / total : 0, 370 timeEnhancement, total > 0 ? 100.0 * timeEnhancement / total : 0, 371 timeLegacySync, total > 0 ? 100.0 * timeLegacySync / total : 0, 372 timeOther, total > 0 ? 100.0 * timeOther / total : 0, 373 total); 374 } 375 376 /** 377 * Perform resolution on all SQL statements 378 */ 379 public boolean resolve() { 380 // Reset timing counters 381 timeScopeBuilder = 0; 382 timeNameResolution = 0; 383 timeEnhancement = 0; 384 timeLegacySync = 0; 385 timeOther = 0; 386 387 // S5: clear any prior binding result so flag-off resolves stay 388 // pinned to BindingResult.empty(). 389 bindingResult = BindingResult.empty(); 390 391 // Setup logging 392 TLog.clearLogs(); 393 if (!TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 394 TLog.disableLog(); 395 } else { 396 TLog.enableAllLevelLog(); 397 } 398 399 try { 400 logInfo("Starting TSQLResolver2.resolve()"); 401 402 long startTime = System.currentTimeMillis(); 403 404 // S3: enable per-TObjectName binding-trace recording when at least 405 // one binding flag is on. Allocation stays lazy — flag-off parses 406 // never instantiate the trace maps (plan §10.1 perf gate). 407 boolean bindingEnabled = config != null 408 && (config.isEmitBindingDiagnostics() 409 || config.isBindingIncludeSuccessfulReferences()); 410 if (bindingEnabled) { 411 resolutionContext.enableBindingTrace(); 412 } 413 414 // Delta 1: Collect metadata from DDL statements if no SQLEnv provided 415 if (getSqlEnv() == null) { 416 collectBatchMetadata(); 417 } 418 419 // Delta 4: Track database context from USE/SET statements 420 trackDatabaseContext(); 421 422 // Phase 1: Build global scope (once for all passes) 423 buildGlobalScope(); 424 425 timeOther += System.currentTimeMillis() - startTime; 426 427 // Phase 2: Perform iterative resolution 428 // (automatically completes after first pass if no second pass is needed) 429 boolean ok = performIterativeResolution(); 430 431 // S5: run the binding-diagnostic post-pass exactly once after the 432 // iterative loop converges, when at least one binding flag is on. 433 // Plan §5.6.1: this is a diagnostic interpretation pass — it 434 // reads the final resolver state but never re-binds names. 435 if (ok && bindingEnabled) { 436 BindingDiagnosticPostPass postPass = 437 new BindingDiagnosticPostPass(this, config); 438 bindingResult = postPass.run(); 439 } 440 441 return ok; 442 443 } catch (Exception e) { 444 logError("Exception in TSQLResolver2.resolve(): " + e.getMessage()); 445 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 446 e.printStackTrace(); 447 } 448 return false; 449 } finally { 450 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 451 TBaseType.dumpLogs(false); 452 } 453 } 454 } 455 456 /** 457 * Perform iterative resolution. 458 * Automatically converges after first pass if no additional passes are needed. 459 * 460 * Architecture: 461 * - Pass 1: Build scope tree + initial name resolution 462 * - Pass 2-N: Reuse scope tree, collect evidence, infer columns, re-resolve names 463 * 464 * This separation allows: 465 * 1. Scopes to accumulate inferred columns across iterations 466 * 2. Later scopes to reference earlier scopes' inferred columns 467 * 3. Forward references to be resolved in subsequent passes 468 */ 469 private boolean performIterativeResolution() { 470 logInfo("Performing iterative resolution (max iterations: " + config.getMaxIterations() + ")"); 471 472 int passNumber = 1; 473 ResolutionStatistics previousStats = null; 474 boolean continueIterating = true; 475 boolean scopesBuilt = false; 476 477 while (continueIterating) { 478 logInfo("=== Pass " + passNumber + " ==="); 479 480 // Create a resolution pass 481 ResolutionPass pass = new ResolutionPass(passNumber, previousStats); 482 483 if (passNumber == 1) { 484 // ========== PASS 1: Build scope tree + initial resolution ========== 485 logInfo("Pass 1: Building scope tree using ScopeBuilder and performing initial resolution"); 486 487 // Clear all state for fresh start 488 resolutionContext.clear(); 489 columnToScopeMap.clear(); 490 fromScopeIndexCache.clear(); 491 dmlIndexCache.clear(); 492 teradataNamedAliasCache.clear(); 493 allColumnReferences.clear(); 494 495 // Use ScopeBuilder to build complete scope tree (handles all nesting correctly) 496 long scopeBuilderStart = System.currentTimeMillis(); 497 scopeBuildResult = scopeBuilder.build(sqlStatements); 498 499 // Get global scope from builder 500 globalScope = scopeBuildResult.getGlobalScope(); 501 502 // Copy column references and scope mappings from ScopeBuildResult 503 columnToScopeMap.putAll(scopeBuildResult.getColumnToScopeMap()); 504 allColumnReferences.addAll(scopeBuildResult.getAllColumnReferences()); 505 timeScopeBuilder += System.currentTimeMillis() - scopeBuilderStart; 506 507 logInfo("ScopeBuilder complete: " + scopeBuildResult.getStatistics()); 508 logInfo("Built " + scopeBuildResult.getStatementScopeMap().size() + " SelectScopes"); 509 510 // Initialize NamespaceEnhancer with scope tree (caches star namespaces) 511 namespaceEnhancer.initialize(scopeBuildResult); 512 namespaceEnhancer.startPass(passNumber); 513 514 // Get SET clause target columns that should not be re-resolved 515 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 516 517 // Get INSERT ALL target columns that should not be re-resolved 518 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 519 520 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 521 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 522 523 // Perform initial name resolution for all collected columns 524 logInfo("Performing initial name resolution for " + allColumnReferences.size() + " column references"); 525 long nameResStart = System.currentTimeMillis(); 526 for (TObjectName objName : allColumnReferences) { 527 // Skip SET clause target columns - they already have sourceTable correctly set 528 // to the UPDATE target table and should NOT be resolved through star columns 529 if (setClauseTargetColumns.contains(objName)) { 530 continue; 531 } 532 533 // Skip INSERT ALL target columns - they already have sourceTable correctly set 534 // to the INSERT target table and should NOT be resolved against the subquery scope 535 if (insertAllTargetColumns.contains(objName)) { 536 continue; 537 } 538 539 IScope scope = columnToScopeMap.get(objName); 540 if (scope != null) { 541 nameResolver.resolve(objName, scope); 542 543 // Handle USING column priority for JOIN...USING syntax 544 handleUsingColumnResolution(objName); 545 546 // Handle Teradata NAMED alias resolution 547 handleTeradataNamedAliasResolution(objName); 548 handleQualifyClauseAliasResolution(objName); 549 550 // Handle subquery aliased/calculated column resolution 551 // Ensures aliased columns don't incorrectly trace to base tables 552 handleSubqueryAliasedColumnResolution(objName); 553 554 // Collect unresolved references for enhancement 555 collectForEnhancementIfNeeded(objName, scope); 556 } 557 } 558 559 // Restore sourceTable for MERGE INSERT VALUES columns after name resolution. 560 // Name resolution may have set an AMBIGUOUS resolution (e.g., column 'product' 561 // appears in both target and source tables through the ON clause). In MERGE 562 // semantics, WHEN NOT MATCHED VALUES columns always reference the USING (source) 563 // table. 564 // 565 // For AMBIGUOUS resolution: clear it so getSourceTable() returns the actual field 566 // value (the USING table). AMBIGUOUS means the column was found in both target and 567 // source namespaces, but semantically it must reference the source. 568 // 569 // For EXACT_MATCH resolution: keep it because it contains star column push-down 570 // tracing info (e.g., when USING is a subquery with SELECT *, the resolution 571 // traces the VALUES column to the physical table inside the subquery). 572 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 573 TObjectName col = entry.getKey(); 574 TTable usingTable = entry.getValue(); 575 ResolutionResult res = col.getResolution(); 576 if (res != null && res.isAmbiguous()) { 577 col.setResolution(null); 578 } 579 col.setSourceTable(usingTable); 580 } 581 582 timeNameResolution += System.currentTimeMillis() - nameResStart; 583 584 // Explicit Enhancement Phase: Add collected columns to namespaces 585 long enhanceStart = System.currentTimeMillis(); 586 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 587 timeEnhancement += System.currentTimeMillis() - enhanceStart; 588 logInfo("Pass 1 enhancement: " + enhanceResult.getTotalAdded() + " columns added to namespaces"); 589 590 scopesBuilt = true; 591 logInfo("Pass 1 complete. Resolved " + allColumnReferences.size() + " column references."); 592 593 594 } else { 595 // ========== PASS 2+: Explicit Enhancement + Re-resolve ========== 596 logInfo("Pass " + passNumber + ": Explicit namespace enhancement and re-resolution"); 597 598 // ======== Phase A: Start New Pass ======== 599 namespaceEnhancer.startPass(passNumber); 600 601 // ======== Phase B: Clear Resolution Results (keep scopes!) ======== 602 logInfo("Phase B: Clearing resolution results (scopes preserved)"); 603 resolutionContext.clear(); 604 605 // ======== Phase C: Re-resolve with Enhanced Namespaces ======== 606 logInfo("Phase C: Re-resolving with enhanced namespaces"); 607 608 // Get SET clause target columns that should not be re-resolved 609 Set<TObjectName> setClauseTargetColumns = scopeBuilder.getSetClauseTargetColumns(); 610 611 // Get INSERT ALL target columns that should not be re-resolved 612 Set<TObjectName> insertAllTargetColumns = scopeBuilder.getInsertAllTargetColumns(); 613 614 // Get MERGE INSERT VALUES columns that need sourceTable restoration after resolution 615 Map<TObjectName, TTable> mergeInsertValuesColumns = scopeBuilder.getMergeInsertValuesColumns(); 616 617 // Re-resolve all column references using their original scopes 618 // Scopes are reused from Pass 1, but namespaces may have been enhanced 619 for (TObjectName objName : allColumnReferences) { 620 // Skip SET clause target columns - they already have sourceTable correctly set 621 // to the UPDATE target table and should NOT be resolved through star columns 622 if (setClauseTargetColumns.contains(objName)) { 623 continue; 624 } 625 626 // Skip INSERT ALL target columns - they already have sourceTable correctly set 627 // to the INSERT target table and should NOT be resolved against the subquery scope 628 if (insertAllTargetColumns.contains(objName)) { 629 continue; 630 } 631 632 IScope scope = columnToScopeMap.get(objName); 633 if (scope != null) { 634 nameResolver.resolve(objName, scope); 635 636 // Handle USING column priority for JOIN...USING syntax 637 handleUsingColumnResolution(objName); 638 639 // Handle Teradata NAMED alias resolution 640 handleTeradataNamedAliasResolution(objName); 641 handleQualifyClauseAliasResolution(objName); 642 643 // Handle subquery aliased/calculated column resolution 644 // Ensures aliased columns don't incorrectly trace to base tables 645 handleSubqueryAliasedColumnResolution(objName); 646 647 // Collect for next enhancement pass if still targets star namespace 648 collectForEnhancementIfNeeded(objName, scope); 649 } 650 } 651 652 // Restore sourceTable for MERGE INSERT VALUES columns after re-resolution 653 for (Map.Entry<TObjectName, TTable> entry : mergeInsertValuesColumns.entrySet()) { 654 ResolutionResult res = entry.getKey().getResolution(); 655 if (res != null && res.isAmbiguous()) { 656 entry.getKey().setResolution(null); 657 } 658 entry.getKey().setSourceTable(entry.getValue()); 659 } 660 661 // ======== Phase D: Explicit Namespace Enhancement ======== 662 logInfo("Phase D: Explicit namespace enhancement"); 663 EnhancementResult enhanceResult = namespaceEnhancer.enhance(); 664 logInfo("Pass " + passNumber + " enhancement: " + 665 enhanceResult.getTotalAdded() + " columns added, " + 666 enhanceResult.getTotalSkipped() + " skipped (existing)"); 667 668 // Legacy support: also run old evidence collection (if needed) 669 if (config.isEvidenceCollectionEnabled()) { 670 runLegacyEvidenceCollection(); 671 } 672 } 673 674 // Get statistics after this pass 675 ResolutionStatistics currentStats = getStatistics(); 676 pass.complete(currentStats); 677 678 // Record this pass 679 convergenceDetector.recordPass(pass); 680 passHistory.add(pass); 681 682 logInfo(pass.getSummary()); 683 684 // Check convergence 685 ConvergenceDetector.ConvergenceResult convergence = convergenceDetector.checkConvergence(); 686 if (convergence.hasConverged()) { 687 logInfo("Convergence detected: " + convergence.getReason()); 688 pass.setStopReason(convergence.getReason()); 689 continueIterating = false; 690 } else { 691 // Prepare for next pass 692 previousStats = currentStats; 693 passNumber++; 694 } 695 } 696 697 // Create cloned columns for star column tracing 698 // This is a CORE part of TSQLResolver2 - when a column traces through a CTE/subquery 699 // with SELECT * to a physical table, we create a cloned TObjectName with sourceTable 700 // pointing to the traced physical table. This ensures complete lineage tracking. 701 createTracedColumnClones(); 702 703 // Sync to legacy structures if enabled 704 if (config.isLegacyCompatibilityEnabled()) { 705 long syncStart = System.currentTimeMillis(); 706 syncToLegacyStructures(); 707 timeLegacySync += System.currentTimeMillis() - syncStart; 708 } 709 710 // Print final statistics 711 logInfo("Iterative resolution complete after " + passHistory.size() + " passes"); 712 ResolutionStatistics finalStats = getStatistics(); 713 logInfo("Final statistics: " + finalStats); 714 715 // Print namespace enhancement summary if in debug mode 716 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 717 logInfo("=== Namespace Enhancement Summary ==="); 718 logInfo("Total columns added: " + namespaceEnhancer.getTotalColumnsAdded()); 719 } 720 721 // Print performance timing breakdown 722 logInfo(getPerformanceTimings()); 723 724 // Accumulate to global timings for profiling 725 globalTimeScopeBuilder += timeScopeBuilder; 726 globalTimeNameResolution += timeNameResolution; 727 globalTimeEnhancement += timeEnhancement; 728 globalTimeLegacySync += timeLegacySync; 729 globalTimeOther += timeOther; 730 globalResolveCount++; 731 732 return true; 733 } 734 735 /** 736 * Run legacy evidence collection (deprecated). 737 * Kept for backward compatibility. 738 */ 739 @SuppressWarnings("deprecation") 740 private void runLegacyEvidenceCollection() { 741 logInfo("Running legacy evidence collection (deprecated)"); 742 743 gudusoft.gsqlparser.resolver2.inference.EvidenceCollector evidenceCollector = 744 new gudusoft.gsqlparser.resolver2.inference.EvidenceCollector(); 745 746 int evidenceCount = 0; 747 for (int i = 0; i < sqlStatements.size(); i++) { 748 Object stmt = sqlStatements.get(i); 749 if (stmt instanceof TSelectSqlStatement) { 750 List<gudusoft.gsqlparser.resolver2.inference.InferenceEvidence> stmtEvidence = 751 evidenceCollector.collectFromSelect((TSelectSqlStatement) stmt); 752 evidenceCount += stmtEvidence.size(); 753 } 754 } 755 756 logInfo("Legacy evidence collection: " + evidenceCount + " items"); 757 } 758 759 /** 760 * Get the namespace enhancer for external access to enhancement history. 761 * 762 * @return the namespace enhancer 763 */ 764 public NamespaceEnhancer getNamespaceEnhancer() { 765 return namespaceEnhancer; 766 } 767 768 /** 769 * Get a detailed enhancement report. 770 * 771 * @return detailed report string 772 */ 773 public String getEnhancementReport() { 774 return namespaceEnhancer.generateReport(); 775 } 776 777 /** 778 * Re-process a statement for name resolution only (without rebuilding scopes). 779 * This is used in Pass 2+ to re-resolve names using enhanced scopes. 780 * 781 * CRITICAL (Principle 1: Scope完全复用): 782 * - Scope tree is built ONCE in Pass 1 and completely reused in Pass 2+ 783 * - This method MUST NOT call processStatement() which rebuilds scopes 784 * - Instead, it iterates through allColumnReferences and re-resolves each 785 * column using its original scope from columnToScopeMap 786 * 787 * This allows: 788 * - Namespaces to be enhanced across iterations (Principle 2) 789 * - Star columns to benefit from reverse inference (Principle 3) 790 * - All previous inference results to be preserved 791 */ 792 private void reprocessStatementNamesOnly(Object statement) { 793 logDebug("Re-resolving column references without rebuilding scopes"); 794 795 // Re-resolve all column references using their original scopes 796 // The scopes are reused from Pass 1, but their namespaces may have been enhanced 797 for (TObjectName objName : allColumnReferences) { 798 IScope scope = columnToScopeMap.get(objName); 799 if (scope != null) { 800 // Re-resolve this column using the (potentially enhanced) scope 801 nameResolver.resolve(objName, scope); 802 803 // Handle USING column priority for JOIN...USING syntax 804 handleUsingColumnResolution(objName); 805 806 // Handle Teradata NAMED alias resolution 807 handleTeradataNamedAliasResolution(objName); 808 handleQualifyClauseAliasResolution(objName); 809 810 // Collect for next enhancement pass if still unresolved 811 collectForEnhancementIfNeeded(objName, scope); 812 } else { 813 logError("No scope found for column: " + objName); 814 } 815 } 816 } 817 818 /** 819 * Handle special resolution for USING columns in JOIN...USING syntax. 820 * In "a JOIN table2 USING (id)", the USING column exists in BOTH tables. 821 * - The synthetic column (clone) resolves to the right-side table (table2) 822 * - The original USING column resolves to the left-side table (a) 823 * 824 * @param objName The column reference 825 */ 826 private void handleUsingColumnResolution(TObjectName objName) { 827 if (objName == null || scopeBuildResult == null) return; 828 829 // Check if this is a synthetic USING column (should resolve to right table) 830 TTable rightTable = scopeBuildResult.getUsingColumnRightTable(objName); 831 if (rightTable != null) { 832 // This is the synthetic USING column - set its sourceTable to the right-side table 833 objName.setSourceTable(rightTable); 834 835 // Create a proper resolution with the right-side table 836 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 837 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 838 null, // no namespace for USING columns 839 objName.getColumnNameOnly(), 840 null, // no definition node 841 1.0, // high confidence 842 "using_column_right", 843 rightTable // override table - the right-side table of the JOIN 844 ); 845 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 846 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 847 848 // Update the TObjectName's resolution so formatter uses correct finalTable 849 objName.setResolution(result); 850 851 // Also register in ResolutionContext so getReferencesTo(table) can find it 852 resolutionContext.registerResolution(objName, result); 853 854 logDebug("USING column " + objName.getColumnNameOnly() + 855 " -> right-side table " + rightTable.getName()); 856 return; 857 } 858 859 // Check if this is the original USING column (should resolve to left table) 860 TTable leftTable = scopeBuildResult.getUsingColumnLeftTable(objName); 861 if (leftTable != null) { 862 // This is the original USING column - set its sourceTable to the left-side table 863 objName.setSourceTable(leftTable); 864 865 // Create a proper resolution with the left-side table 866 gudusoft.gsqlparser.resolver2.model.ColumnSource source = 867 new gudusoft.gsqlparser.resolver2.model.ColumnSource( 868 null, // no namespace for USING columns 869 objName.getColumnNameOnly(), 870 null, // no definition node 871 1.0, // high confidence 872 "using_column_left", 873 leftTable // override table - the left-side table of the JOIN 874 ); 875 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = 876 gudusoft.gsqlparser.resolver2.model.ResolutionResult.exactMatch(source); 877 878 // Update the TObjectName's resolution so formatter uses correct finalTable 879 objName.setResolution(result); 880 881 // Also register in ResolutionContext so getReferencesTo(table) can find it 882 resolutionContext.registerResolution(objName, result); 883 884 logDebug("USING column " + objName.getColumnNameOnly() + 885 " -> left-side table " + leftTable.getName()); 886 } 887 } 888 889 /** 890 * Handle Teradata NAMED alias resolution. 891 * 892 * <p>In Teradata, NAMED aliases defined in the SELECT list (using the {@code (NAMED alias)} syntax) 893 * can be referenced in the WHERE and QUALIFY clauses of the same SELECT statement. This is different 894 * from standard SQL where column aliases are only visible in ORDER BY.</p> 895 * 896 * <p>This method checks if a resolved column matches a NAMED alias from the enclosing SELECT list. 897 * If it does, the resolution is updated to indicate this is a calculated column (alias), not a 898 * physical column from the table.</p> 899 * 900 * <p>Example:</p> 901 * <pre> 902 * SELECT USI_ID, SUBS_ID, 903 * (CAST(:param AS TIMESTAMP(0)))(NAMED REPORT_DTTM) 904 * FROM PRD2_ODW.SUBS_USI_HISTORY 905 * WHERE stime <= REPORT_DTTM AND etime > REPORT_DTTM 906 * </pre> 907 * <p>Here, REPORT_DTTM references in WHERE should NOT be linked to PRD2_ODW.SUBS_USI_HISTORY 908 * because REPORT_DTTM is a NAMED alias, not a physical column.</p> 909 * 910 * @param objName The column reference to check 911 */ 912 private void handleTeradataNamedAliasResolution(TObjectName objName) { 913 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 914 915 // Only applies to Teradata 916 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 917 if (dbVendor != EDbVendor.dbvteradata) return; 918 919 String columnName = objName.getColumnNameOnly(); 920 if (columnName == null || columnName.isEmpty()) return; 921 922 // Only apply to UNQUALIFIED column references (no table prefix) 923 // If a column has a table qualifier like "CP.CALC_PLATFORM_ID", it's clearly 924 // referencing a specific table's column, not a NAMED alias 925 if (objName.getTableToken() != null) return; 926 927 // Get the scope for this column reference 928 IScope scope = columnToScopeMap.get(objName); 929 if (scope == null) return; 930 931 // Find the enclosing SELECT statement from the scope 932 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 933 if (enclosingSelect == null) return; 934 935 // Optimization C: Use cached index for O(1) lookup instead of O(N) iteration 936 Map<String, TResultColumn> aliasIndex = getTeradataNamedAliasIndex(enclosingSelect); 937 if (aliasIndex == null || aliasIndex.isEmpty()) return; 938 939 // S2: look up via the vendor-aware key. Teradata is case-insensitive 940 // for unquoted column aliases but case-sensitive for quoted ones, so 941 // raw {@code toLowerCase()} drops quoted-alias correctness. 942 // 943 // CRITICAL — vendor source-of-truth invariant (codex round-2 review): 944 // both this lookup and the storage path in 945 // {@link #getTeradataNamedAliasIndex} read from the SAME source 946 // {@code sqlStatements.get(0).dbvendor}: this method's local 947 // {@code dbVendor} variable is initialized from it at line 916 948 // above, and the storage path passes the same value. They cannot 949 // diverge. 950 String aliasKey = gudusoft.gsqlparser.sqlenv.IdentifierService.normalizeStatic( 951 dbVendor, 952 gudusoft.gsqlparser.sqlenv.ESQLDataObjectType.dotColumn, 953 columnName); 954 TResultColumn resultCol = aliasIndex.get(aliasKey); 955 if (resultCol == null) { 956 // Fallback: quoted-vs-unquoted mix in the same SELECT (rare). 957 // Walk the cached map with a vendor-aware compare so the 958 // normalized-key fast probe miss does not become a false negative. 959 for (Map.Entry<String, TResultColumn> entry : aliasIndex.entrySet()) { 960 if (gudusoft.gsqlparser.sqlenv.IdentifierService.areEqualStatic( 961 dbVendor, 962 gudusoft.gsqlparser.sqlenv.ESQLDataObjectType.dotColumn, 963 entry.getKey(), 964 columnName)) { 965 resultCol = entry.getValue(); 966 break; 967 } 968 } 969 } 970 if (resultCol == null) return; 971 972 // Skip if objName is part of this result column's expression 973 // This handles cases like "CAST(ID AS DECIMAL) AS ID" where the ID inside 974 // CAST is the source column, not a reference to the ID alias 975 if (isColumnWithinResultColumn(objName, resultCol)) { 976 return; 977 } 978 979 // Found a matching NAMED alias 980 // Clear the source table since this is an alias, not a physical column 981 objName.setSourceTable(null); 982 983 // Create a new ColumnSource with the TResultColumn as the definition node 984 // This will make isCalculatedColumn() return true 985 ColumnSource source = new ColumnSource( 986 null, // namespace - not from a table 987 columnName, 988 resultCol, // definition node - the TResultColumn with the alias 989 1.0, // high confidence 990 "teradata_named_alias" 991 ); 992 ResolutionResult result = ResolutionResult.exactMatch(source); 993 objName.setResolution(result); 994 resolutionContext.registerResolution(objName, result); 995 996 logDebug("Teradata NAMED alias: " + columnName + " -> alias from SELECT list"); 997 } 998 999 /** 1000 * Handle QUALIFY clause alias resolution for Snowflake, BigQuery, and Databricks. 1001 * 1002 * <p>In Snowflake, BigQuery, and Databricks, column aliases defined in the SELECT list 1003 * can be referenced in the QUALIFY clause. This is different from standard SQL where 1004 * column aliases are only visible in ORDER BY.</p> 1005 * 1006 * <p>This method checks if a column reference in the QUALIFY clause matches an alias 1007 * from the enclosing SELECT list. If it does, the resolution is updated to indicate 1008 * this is a calculated column (alias), not a physical column from the table.</p> 1009 * 1010 * <p>Example:</p> 1011 * <pre> 1012 * SELECT RoomNumber, RoomType, BlockFloor, 1013 * ROW_NUMBER() OVER (PARTITION BY RoomType ORDER BY BlockFloor) AS row_num 1014 * FROM Hospital.Room 1015 * QUALIFY row_num = 1 1016 * </pre> 1017 * <p>Here, row_num in QUALIFY should NOT be linked to Hospital.Room because 1018 * row_num is an alias for the window function, not a physical column.</p> 1019 * 1020 * @param objName The column reference to check 1021 */ 1022 private void handleQualifyClauseAliasResolution(TObjectName objName) { 1023 if (objName == null || sqlStatements == null || sqlStatements.size() == 0) return; 1024 1025 // Only applies to databases that support QUALIFY with alias visibility 1026 EDbVendor dbVendor = sqlStatements.get(0).dbvendor; 1027 if (dbVendor != EDbVendor.dbvsnowflake && 1028 dbVendor != EDbVendor.dbvbigquery && 1029 dbVendor != EDbVendor.dbvdatabricks) return; 1030 1031 String columnName = objName.getColumnNameOnly(); 1032 if (columnName == null || columnName.isEmpty()) return; 1033 1034 // Only apply to UNQUALIFIED column references (no table prefix) 1035 if (objName.getTableToken() != null) return; 1036 1037 // Check if this column is within a QUALIFY clause 1038 if (!isInQualifyClause(objName)) return; 1039 1040 // Get the scope for this column reference 1041 IScope scope = columnToScopeMap.get(objName); 1042 if (scope == null) return; 1043 1044 // Find the enclosing SELECT statement from the scope 1045 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 1046 if (enclosingSelect == null) return; 1047 1048 // Look for a matching alias in the SELECT list 1049 TResultColumnList resultColumns = enclosingSelect.getResultColumnList(); 1050 if (resultColumns == null || resultColumns.size() == 0) return; 1051 1052 TResultColumn matchingResultCol = null; 1053 for (int i = 0; i < resultColumns.size(); i++) { 1054 TResultColumn resultCol = resultColumns.getResultColumn(i); 1055 if (resultCol == null) continue; 1056 1057 // Check if this result column has an alias matching the column name 1058 if (resultCol.getAliasClause() != null && 1059 resultCol.getAliasClause().getAliasName() != null) { 1060 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 1061 if (aliasName != null && aliasName.equalsIgnoreCase(columnName)) { 1062 matchingResultCol = resultCol; 1063 break; 1064 } 1065 } 1066 } 1067 1068 if (matchingResultCol == null) return; 1069 1070 // Found a matching alias - clear the source table since this is an alias, not a physical column 1071 objName.setSourceTable(null); 1072 1073 // Create a new ColumnSource with the TResultColumn as the definition node 1074 // This will make isCalculatedColumn() return true 1075 ColumnSource source = new ColumnSource( 1076 null, // namespace - not from a table 1077 columnName, 1078 matchingResultCol, // definition node - the TResultColumn with the alias 1079 1.0, // high confidence 1080 "qualify_clause_alias" 1081 ); 1082 ResolutionResult result = ResolutionResult.exactMatch(source); 1083 objName.setResolution(result); 1084 resolutionContext.registerResolution(objName, result); 1085 1086 logDebug("QUALIFY clause alias: " + columnName + " -> alias from SELECT list"); 1087 } 1088 1089 /** 1090 * Check if a column reference is within a QUALIFY clause. 1091 * 1092 * @param objName The column reference to check 1093 * @return true if the column is within a QUALIFY clause 1094 */ 1095 private boolean isInQualifyClause(TObjectName objName) { 1096 if (objName == null) return false; 1097 1098 // Get the column's scope to find the enclosing SELECT statement 1099 IScope scope = columnToScopeMap.get(objName); 1100 if (scope == null) return false; 1101 1102 TSelectSqlStatement enclosingSelect = findEnclosingSelectFromScope(scope); 1103 if (enclosingSelect == null) return false; 1104 1105 // Check if this SELECT has a QUALIFY clause 1106 TQualifyClause qualifyClause = enclosingSelect.getQualifyClause(); 1107 if (qualifyClause == null) return false; 1108 1109 // Check if the column's token position is within the QUALIFY clause's range 1110 if (objName.getStartToken() != null && qualifyClause.getStartToken() != null && 1111 qualifyClause.getEndToken() != null) { 1112 long objPos = objName.getStartToken().posinlist; 1113 long qualifyStart = qualifyClause.getStartToken().posinlist; 1114 long qualifyEnd = qualifyClause.getEndToken().posinlist; 1115 1116 return objPos >= qualifyStart && objPos <= qualifyEnd; 1117 } 1118 1119 return false; 1120 } 1121 1122 /** 1123 * Gets or builds the Teradata NAMED alias index for a SELECT statement. 1124 * Optimization C: Caches the alias map for O(1) lookup instead of O(N) iteration. 1125 * 1126 * @param selectStmt The SELECT statement to get/build the index for 1127 * @return Map from lowercase alias name to TResultColumn, or null if no aliases 1128 */ 1129 private Map<String, TResultColumn> getTeradataNamedAliasIndex(TSelectSqlStatement selectStmt) { 1130 if (selectStmt == null) return null; 1131 1132 // Check cache first 1133 Map<String, TResultColumn> index = teradataNamedAliasCache.get(selectStmt); 1134 if (index != null) { 1135 return index; 1136 } 1137 1138 // Build index for this SELECT statement 1139 TResultColumnList resultColumns = selectStmt.getResultColumnList(); 1140 if (resultColumns == null || resultColumns.size() == 0) { 1141 // Cache empty map to avoid rebuilding 1142 index = java.util.Collections.emptyMap(); 1143 teradataNamedAliasCache.put(selectStmt, index); 1144 return index; 1145 } 1146 1147 index = new java.util.LinkedHashMap<>(); 1148 // S2: store keys via the vendor-aware identifier normalizer so the 1149 // index honors quoted-vs-unquoted distinctions on Teradata. 1150 // 1151 // CRITICAL — vendor source-of-truth invariant (codex round-1+2): 1152 // we read {@code sqlStatements.get(0).dbvendor} here, which is the 1153 // SAME source the lookup at 1154 // {@link #handleTeradataNamedAliasResolution} reads (line 916, 1155 // also gated on {@code dbVendor == EDbVendor.dbvteradata}). Storage 1156 // and lookup share this single source so the index keys built here 1157 // and the lookup keys produced there are guaranteed to use 1158 // identical vendor rules. The fall-through to 1159 // {@code selectStmt.dbvendor} is purely defensive and only fires if 1160 // the resolver's statement list is unexpectedly empty. 1161 EDbVendor indexVendor = (sqlStatements != null && sqlStatements.size() > 0 1162 && sqlStatements.get(0).dbvendor != null) 1163 ? sqlStatements.get(0).dbvendor 1164 : (selectStmt.dbvendor != null ? selectStmt.dbvendor : EDbVendor.dbvgeneric); 1165 for (int i = 0; i < resultColumns.size(); i++) { 1166 TResultColumn resultCol = resultColumns.getResultColumn(i); 1167 if (resultCol == null) continue; 1168 1169 // Check if this result column has a NAMED alias 1170 if (resultCol.getAliasClause() != null && 1171 resultCol.getAliasClause().getAliasName() != null) { 1172 String aliasName = resultCol.getAliasClause().getAliasName().toString(); 1173 if (aliasName != null && !aliasName.isEmpty()) { 1174 String key = gudusoft.gsqlparser.sqlenv.IdentifierService.normalizeStatic( 1175 indexVendor, 1176 gudusoft.gsqlparser.sqlenv.ESQLDataObjectType.dotColumn, 1177 aliasName); 1178 index.put(key, resultCol); 1179 } 1180 } 1181 } 1182 1183 // Cache the index (even if empty, to avoid rebuilding) 1184 teradataNamedAliasCache.put(selectStmt, index); 1185 1186 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !index.isEmpty()) { 1187 logDebug("Built Teradata NAMED alias index for SELECT with " + index.size() + " aliases"); 1188 } 1189 1190 return index; 1191 } 1192 1193 /** 1194 * Check if a column reference (TObjectName) is within a result column's expression. 1195 * This is used to prevent treating source columns in expressions like "CAST(ID AS DECIMAL) AS ID" 1196 * as references to the alias. 1197 * 1198 * @param objName The column reference to check 1199 * @param resultCol The result column to check against 1200 * @return true if objName is within resultCol's expression tree 1201 */ 1202 private boolean isColumnWithinResultColumn(TObjectName objName, TResultColumn resultCol) { 1203 if (objName == null || resultCol == null) return false; 1204 1205 // Get the expression of the result column 1206 TExpression expr = resultCol.getExpr(); 1207 if (expr == null) return false; 1208 1209 // Check by comparing start/end positions 1210 // If objName's position is within resultCol's expression, it's part of it 1211 long objStart = objName.getStartToken() != null ? objName.getStartToken().posinlist : -1; 1212 long objEnd = objName.getEndToken() != null ? objName.getEndToken().posinlist : -1; 1213 long exprStart = expr.getStartToken() != null ? expr.getStartToken().posinlist : -1; 1214 long exprEnd = expr.getEndToken() != null ? expr.getEndToken().posinlist : -1; 1215 1216 if (objStart >= 0 && exprStart >= 0 && objEnd >= 0 && exprEnd >= 0) { 1217 return objStart >= exprStart && objEnd <= exprEnd; 1218 } 1219 1220 return false; 1221 } 1222 1223 /** 1224 * Handle subquery aliased/calculated column resolution. 1225 * 1226 * <p>When a column reference resolves through a subquery (or CTE containing subqueries), 1227 * and the underlying column is an alias or calculated expression, we should NOT trace 1228 * it to the base table. This method ensures that such columns have their sourceTable 1229 * cleared to prevent incorrect attribution.</p> 1230 * 1231 * <p>This is essential for queries like:</p> 1232 * <pre> 1233 * WITH DataCTE AS ( 1234 * SELECT t.col, COUNT(*) AS cnt FROM table1 t ... 1235 * ) 1236 * SELECT * FROM DataCTE 1237 * </pre> 1238 * <p>The 'cnt' column should NOT be traced to 'table1' because it's a calculated column.</p> 1239 * 1240 * @param objName The column reference to check 1241 */ 1242 private void handleSubqueryAliasedColumnResolution(TObjectName objName) { 1243 if (objName == null) return; 1244 1245 // Check if column has a table qualifier pointing to a subquery/CTE 1246 // If so, we should KEEP the sourceTable link for lineage tracing 1247 // The qualifier explicitly tells us which subquery the column belongs to 1248 String tableQualifier = objName.getTableString(); 1249 if (tableQualifier != null && !tableQualifier.isEmpty()) { 1250 IScope scope = columnToScopeMap.get(objName); 1251 if (scope != null) { 1252 TTable qualifiedTable = findTableByQualifier(scope, tableQualifier); 1253 if (qualifiedTable != null && 1254 (qualifiedTable.getSubquery() != null || qualifiedTable.getCTE() != null)) { 1255 // Column has qualifier pointing to a subquery/CTE 1256 // Keep the sourceTable link for lineage tracing (e.g., a.num_emp -> subquery a) 1257 // Don't clear sourceTable - this link is correct and needed 1258 logDebug("Subquery/CTE qualified column: " + objName.toString() + 1259 " - keeping sourceTable link to " + tableQualifier); 1260 return; 1261 } 1262 } 1263 } 1264 1265 // For unqualified columns (or columns qualified with base tables), 1266 // check if this is a calculated column or alias that should not trace to base tables 1267 ColumnSource source = objName.getColumnSource(); 1268 if (source != null) { 1269 if (source.isCalculatedColumn() || source.isColumnAlias()) { 1270 TTable currentSource = objName.getSourceTable(); 1271 if (currentSource != null) { 1272 // Only clear if sourceTable is a base table (not subquery/CTE) 1273 // For subquery/CTE references, keep the link for lineage tracing 1274 if (currentSource.getSubquery() == null && currentSource.getCTE() == null) { 1275 objName.setSourceTable(null); 1276 logDebug("Calculated/alias column: " + objName.getColumnNameOnly() + 1277 " cleared sourceTable (was " + currentSource.getName() + ") - not linked to base table"); 1278 } 1279 } 1280 } 1281 } 1282 } 1283 1284 /** 1285 * Gets or builds the FromScopeIndex for a scope (Performance Optimization B). 1286 * 1287 * <p>This method implements lazy initialization: the index is built on first access 1288 * and cached for subsequent lookups within the same resolution pass.</p> 1289 * 1290 * @param scope The scope to get the index for (SelectScope, UpdateScope, or FromScope) 1291 * @return The cached or newly built FromScopeIndex, or null if scope has no FROM clause 1292 */ 1293 private FromScopeIndex getFromScopeIndex(IScope scope) { 1294 if (scope == null) { 1295 return null; 1296 } 1297 1298 // Get the actual FromScope to use as cache key 1299 IScope fromScope = null; 1300 if (scope instanceof SelectScope) { 1301 fromScope = ((SelectScope) scope).getFromScope(); 1302 } else if (scope instanceof gudusoft.gsqlparser.resolver2.scope.UpdateScope) { 1303 fromScope = ((gudusoft.gsqlparser.resolver2.scope.UpdateScope) scope).getFromScope(); 1304 } else if (scope instanceof FromScope) { 1305 fromScope = scope; 1306 } 1307 1308 if (fromScope == null) { 1309 return null; 1310 } 1311 1312 // Check cache first (lazy initialization) 1313 FromScopeIndex index = fromScopeIndexCache.get(fromScope); 1314 if (index == null) { 1315 // S2: thread the GlobalScope's matcher into the index so per-vendor 1316 // identifier rules govern alias / table-name lookups (BigQuery 1317 // tables sensitive, Oracle / Postgres quoted sensitive, etc.). 1318 gudusoft.gsqlparser.resolver2.matcher.INameMatcher matcher = 1319 globalScope != null ? globalScope.getNameMatcher() : null; 1320 // Build index and cache it 1321 index = new FromScopeIndex(fromScope.getChildren(), matcher); 1322 fromScopeIndexCache.put(fromScope, index); 1323 1324 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1325 logDebug("Built FromScopeIndex for scope: " + index); 1326 } 1327 } 1328 1329 return index; 1330 } 1331 1332 /** 1333 * Find a table by its qualifier (alias or name) in the scope. 1334 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1335 */ 1336 private TTable findTableByQualifier(IScope scope, String qualifier) { 1337 if (scope == null || qualifier == null) return null; 1338 1339 // Use indexed lookup (Performance Optimization B) 1340 FromScopeIndex index = getFromScopeIndex(scope); 1341 if (index != null) { 1342 return index.findTableByQualifier(qualifier); 1343 } 1344 1345 return null; 1346 } 1347 1348 /** 1349 * Check if a column name is an alias (not a passthrough column) in the subquery. 1350 */ 1351 private boolean isColumnAnAliasInSubquery(TSelectSqlStatement subquery, String columnName) { 1352 if (subquery == null || columnName == null) return false; 1353 1354 TResultColumnList resultCols = subquery.getResultColumnList(); 1355 if (resultCols == null) return false; 1356 1357 for (int i = 0; i < resultCols.size(); i++) { 1358 TResultColumn rc = resultCols.getResultColumn(i); 1359 if (rc == null) continue; 1360 1361 // Check if this result column has an alias matching the column name 1362 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1363 String alias = rc.getAliasClause().getAliasName().toString(); 1364 if (alias != null && alias.equalsIgnoreCase(columnName)) { 1365 // Found matching alias - check if it's a calculated column 1366 TExpression expr = rc.getExpr(); 1367 if (expr != null) { 1368 // Not a simple column reference = calculated 1369 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1370 return true; 1371 } 1372 } 1373 } 1374 } 1375 1376 // Also check for SQL Server proprietary alias syntax: alias = expr 1377 // In this case, the alias is the column name itself 1378 String colName = getResultColumnName(rc); 1379 if (colName != null && colName.equalsIgnoreCase(columnName)) { 1380 TExpression expr = rc.getExpr(); 1381 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1382 return true; 1383 } 1384 } 1385 } 1386 return false; 1387 } 1388 1389 /** 1390 * Get the column name from a result column (handles aliases and SQL Server proprietary syntax). 1391 */ 1392 private String getResultColumnName(TResultColumn rc) { 1393 if (rc == null) return null; 1394 1395 // Check for explicit alias 1396 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1397 return rc.getAliasClause().getAliasName().toString(); 1398 } 1399 1400 // Check for SQL Server proprietary alias: alias = expr 1401 // In this case, the expression itself contains the alias 1402 TExpression expr = rc.getExpr(); 1403 if (expr != null && expr.getExpressionType() == EExpressionType.assignment_t) { 1404 // The left side is the alias 1405 if (expr.getLeftOperand() != null && expr.getLeftOperand().getObjectOperand() != null) { 1406 return expr.getLeftOperand().getObjectOperand().toString(); 1407 } 1408 } 1409 1410 return null; 1411 } 1412 1413 /** 1414 * Find the enclosing SELECT statement from a scope. 1415 * Traverses up the scope hierarchy to find a SelectScope and gets its node. 1416 * 1417 * @param scope The scope to start from 1418 * @return The enclosing SELECT statement, or null if not found 1419 */ 1420 private TSelectSqlStatement findEnclosingSelectFromScope(IScope scope) { 1421 if (scope == null) return null; 1422 1423 IScope currentScope = scope; 1424 int maxIterations = 100; // Prevent infinite loops 1425 int iterations = 0; 1426 1427 while (currentScope != null && iterations < maxIterations) { 1428 iterations++; 1429 1430 // Check if current scope is a SelectScope 1431 if (currentScope instanceof SelectScope) { 1432 TParseTreeNode node = currentScope.getNode(); 1433 if (node instanceof TSelectSqlStatement) { 1434 return (TSelectSqlStatement) node; 1435 } 1436 } 1437 1438 // Move up to parent scope 1439 currentScope = currentScope.getParent(); 1440 } 1441 return null; 1442 } 1443 1444 /** 1445 * Collect a column reference for namespace enhancement if it targets a star namespace. 1446 * This is called during resolution to gather columns that need to be added to namespaces. 1447 * 1448 * @param objName The column reference 1449 * @param scope The scope where the column should be resolved 1450 */ 1451 private void collectForEnhancementIfNeeded(TObjectName objName, IScope scope) { 1452 if (objName == null || scope == null) return; 1453 1454 String columnName = objName.getColumnNameOnly(); 1455 if (columnName == null || columnName.isEmpty()) return; 1456 1457 // Get the resolution result to check status 1458 gudusoft.gsqlparser.resolver2.model.ResolutionResult result = objName.getResolution(); 1459 1460 // Find candidate namespace from scope's FROM clause 1461 INamespace candidateNamespace = findCandidateNamespace(objName, scope); 1462 1463 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 1464 logInfo("[TSQLResolver2] collectForEnhancement: column=" + columnName + 1465 ", candidateNs=" + (candidateNamespace != null ? candidateNamespace.getDisplayName() : "null") + 1466 ", hasStar=" + (candidateNamespace != null ? candidateNamespace.hasStarColumn() : "N/A")); 1467 } 1468 1469 if (candidateNamespace != null) { 1470 // Determine confidence based on context 1471 double confidence = 0.7; // Default for unqualified reference 1472 String evidence = "outer_reference"; 1473 1474 // Higher confidence for qualified references (e.g., "a.column") 1475 if (objName.getTableToken() != null) { 1476 confidence = 0.9; 1477 evidence = "qualified_reference"; 1478 } 1479 1480 // Collect for enhancement 1481 namespaceEnhancer.collectColumnRef( 1482 columnName, 1483 candidateNamespace, 1484 objName, 1485 confidence, 1486 evidence 1487 ); 1488 } 1489 } 1490 1491 /** 1492 * Find the candidate namespace for a column reference. 1493 * Looks at the scope's FROM clause to find namespaces with star columns. 1494 * Uses FromScopeIndex for O(1) lookup instead of O(N) linear scan. 1495 */ 1496 private INamespace findCandidateNamespace(TObjectName objName, IScope scope) { 1497 // Use indexed lookup (Performance Optimization B) 1498 FromScopeIndex index = getFromScopeIndex(scope); 1499 if (index == null) { 1500 return null; 1501 } 1502 1503 String tablePrefix = objName.getTableToken() != null ? 1504 objName.getTableToken().toString() : null; 1505 1506 return index.findCandidateNamespace(tablePrefix); 1507 } 1508 1509 /** 1510 * Delta 1: Collect metadata from DDL statements in the batch. 1511 * 1512 * If no SQLEnv is provided, this method extracts table/column metadata 1513 * from CREATE TABLE and CREATE VIEW statements in the SQL batch and 1514 * creates a TSQLEnv for use during resolution. 1515 * 1516 * This enables standalone resolution of SQL batches that contain both 1517 * DDL and DML without requiring external metadata. 1518 */ 1519 private void collectBatchMetadata() { 1520 if (sqlStatements == null || sqlStatements.size() == 0) { 1521 return; 1522 } 1523 1524 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1525 BatchMetadataCollector collector = new BatchMetadataCollector(sqlStatements, vendor); 1526 TSQLEnv batchEnv = collector.collect(); 1527 1528 if (batchEnv != null) { 1529 setSqlEnv(batchEnv); 1530 logDebug("Collected batch-local DDL metadata into TSQLEnv"); 1531 } 1532 } 1533 1534 /** 1535 * Delta 4: Track database context from USE/SET statements. 1536 * 1537 * Scans the statement list for USE DATABASE, USE SCHEMA, SET SCHEMA, 1538 * and similar statements, and applies the context to TSQLEnv for 1539 * proper resolution of unqualified table names. 1540 */ 1541 private void trackDatabaseContext() { 1542 if (sqlStatements == null || sqlStatements.size() == 0) { 1543 return; 1544 } 1545 1546 DatabaseContextTracker tracker = new DatabaseContextTracker(); 1547 tracker.processStatements(sqlStatements); 1548 1549 // Apply context to TSQLEnv if any context was found 1550 if (tracker.hasContext()) { 1551 TSQLEnv env = getSqlEnv(); 1552 if (env != null) { 1553 tracker.applyDefaults(env); 1554 logDebug("Applied database context: " + tracker); 1555 } else { 1556 // Create a minimal TSQLEnv if none exists 1557 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvmssql; 1558 try { 1559 env = new TSQLEnv(vendor) { 1560 @Override 1561 public void initSQLEnv() { 1562 // Minimal initialization 1563 } 1564 }; 1565 tracker.applyDefaults(env); 1566 setSqlEnv(env); 1567 logDebug("Created minimal TSQLEnv with database context: " + tracker); 1568 } catch (Exception e) { 1569 // TSQLEnv creation failed - context will not be applied 1570 logDebug("Failed to create TSQLEnv for database context: " + e.getMessage()); 1571 } 1572 } 1573 } 1574 } 1575 1576 /** 1577 * Build the global scope 1578 */ 1579 private void buildGlobalScope() { 1580 logDebug("Building global scope"); 1581 1582 // Get SQLEnv and vendor for qualified name resolution 1583 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1584 EDbVendor vendor = EDbVendor.dbvoracle; // Default 1585 1586 // Try to get vendor from statements 1587 if (sqlStatements != null && sqlStatements.size() > 0) { 1588 vendor = sqlStatements.get(0).dbvendor; 1589 } 1590 1591 // Create global scope with sqlEnv and vendor for proper qualified name resolution 1592 globalScope = new GlobalScope(globalContext, config.getNameMatcher(), sqlEnv, vendor); 1593 1594 logDebug("GlobalScope created with defaults: catalog=" + 1595 globalScope.getDefaultCatalog() + ", schema=" + globalScope.getDefaultSchema()); 1596 } 1597 1598 /** 1599 * Process a single statement 1600 */ 1601 private void processStatement(Object statement) { 1602 if (statement instanceof TSelectSqlStatement) { 1603 processSelectStatement((TSelectSqlStatement) statement); 1604 } 1605 // TODO: Add support for INSERT, UPDATE, DELETE, etc. 1606 } 1607 1608 /** 1609 * Process a SELECT statement 1610 */ 1611 private void processSelectStatement(TSelectSqlStatement select) { 1612 processSelectStatement(select, globalScope); 1613 } 1614 1615 /** 1616 * Process a SELECT statement with a specific parent scope. 1617 * This is used for recursive processing of CTE subqueries. 1618 */ 1619 private void processSelectStatement(TSelectSqlStatement select, IScope givenParentScope) { 1620 logDebug("Processing SELECT statement"); 1621 1622 // Create SELECT scope (will be child of CTE scope if CTEs exist, otherwise child of given parent scope) 1623 IScope parentScope = givenParentScope; 1624 1625 // Process CTEs (WITH clause) if present 1626 CTEScope cteScope = null; 1627 if (select.getCteList() != null && select.getCteList().size() > 0) { 1628 cteScope = processCTEs(select.getCteList(), givenParentScope); 1629 parentScope = cteScope; // CTEs become parent of SELECT 1630 } 1631 1632 SelectScope selectScope = new SelectScope(parentScope, select); 1633 1634 // Process FROM clause 1635 if (select.tables != null && select.tables.size() > 0) { 1636 FromScope fromScope = processFromClause(select, selectScope); 1637 selectScope.setFromScope(fromScope); 1638 } 1639 1640 // Process column references in SELECT list 1641 if (select.getResultColumnList() != null) { 1642 List<TObjectName> selectListColumns = collectObjectNamesFromResultColumns(select.getResultColumnList()); 1643 processColumnReferences(selectListColumns, selectScope); 1644 } 1645 1646 // Process WHERE clause 1647 if (select.getWhereClause() != null && 1648 select.getWhereClause().getCondition() != null) { 1649 List<TObjectName> whereColumns = select.getWhereClause().getCondition().getColumnsInsideExpression(); 1650 processColumnReferences(whereColumns, selectScope); 1651 } 1652 1653 // Process GROUP BY clause 1654 GroupByScope groupByScope = null; 1655 if (select.getGroupByClause() != null) { 1656 groupByScope = processGroupBy(select, selectScope); 1657 } 1658 1659 // Process HAVING clause 1660 if (select.getGroupByClause() != null && 1661 select.getGroupByClause().getHavingClause() != null) { 1662 processHaving(select, selectScope, groupByScope); 1663 } 1664 1665 // Process ORDER BY clause 1666 if (select.getOrderbyClause() != null) { 1667 processOrderBy(select, selectScope); 1668 } 1669 } 1670 1671 /** 1672 * Process FROM clause and build FROM scope 1673 */ 1674 private FromScope processFromClause(TSelectSqlStatement select, IScope parentScope) { 1675 FromScope fromScope = new FromScope(parentScope, select.tables); 1676 1677 // Process each relation (table or join) 1678 ArrayList<TTable> relations = select.getRelations(); 1679 if (relations != null) { 1680 for (TTable table : relations) { 1681 processTableOrJoin(table, fromScope); 1682 } 1683 } 1684 1685 return fromScope; 1686 } 1687 1688 /** 1689 * Recursively process a table or join expression and add to FROM scope 1690 */ 1691 private void processTableOrJoin(TTable table, FromScope fromScope) { 1692 if (table.getTableType() == ETableSource.join) { 1693 // This is a JOIN - recursively process left and right tables 1694 TJoinExpr joinExpr = table.getJoinExpr(); 1695 if (joinExpr != null) { 1696 logDebug("Processing JOIN: " + joinExpr.getJointype()); 1697 1698 // Recursively process left table 1699 TTable leftTable = joinExpr.getLeftTable(); 1700 if (leftTable != null) { 1701 processTableOrJoin(leftTable, fromScope); 1702 } 1703 1704 // Recursively process right table 1705 TTable rightTable = joinExpr.getRightTable(); 1706 if (rightTable != null) { 1707 processTableOrJoin(rightTable, fromScope); 1708 } 1709 1710 // TODO: Create JoinScope to handle nullable semantics 1711 // For now, we just add the base tables to FROM scope 1712 } 1713 } else { 1714 // This is a base table (objectname, subquery, etc.) 1715 INamespace namespace = createNamespaceForTable(table); 1716 1717 // Validate namespace (load metadata) 1718 namespace.validate(); 1719 1720 // Determine alias 1721 String alias = table.getAliasName() != null 1722 ? table.getAliasName() 1723 : table.getName(); 1724 1725 // Add to FROM scope 1726 fromScope.addChild(namespace, alias, false); 1727 1728 logDebug("Added table to FROM scope: " + alias); 1729 } 1730 } 1731 1732 /** 1733 * Process CTEs (WITH clause) and build CTE scope 1734 */ 1735 private CTEScope processCTEs(TCTEList cteList, IScope parentScope) { 1736 CTEScope cteScope = new CTEScope(parentScope, cteList); 1737 logDebug("Processing WITH clause with " + cteList.size() + " CTE(s)"); 1738 1739 // Process each CTE in order (later CTEs can reference earlier ones) 1740 for (int i = 0; i < cteList.size(); i++) { 1741 TCTE cte = cteList.getCTE(i); 1742 1743 // Get CTE name 1744 String cteName = cte.getTableName() != null ? cte.getTableName().toString() : null; 1745 if (cteName == null) { 1746 logDebug("Skipping CTE with null name"); 1747 continue; 1748 } 1749 1750 // Get CTE subquery 1751 TSelectSqlStatement cteSubquery = cte.getSubquery(); 1752 if (cteSubquery == null) { 1753 logDebug("Skipping CTE '" + cteName + "' with null subquery"); 1754 continue; 1755 } 1756 1757 // Create CTENamespace 1758 CTENamespace cteNamespace = new CTENamespace( 1759 cte, 1760 cteName, 1761 cteSubquery, 1762 config.getNameMatcher() 1763 ); 1764 1765 // Validate namespace (load column metadata from subquery) 1766 cteNamespace.validate(); 1767 1768 // Add to CTE scope (makes it visible to later CTEs and main query) 1769 cteScope.addCTE(cteName, cteNamespace); 1770 1771 logDebug("Added CTE to scope: " + cteName + 1772 " (columns=" + cteNamespace.getExplicitColumns().size() + 1773 ", recursive=" + cteNamespace.isRecursive() + ")"); 1774 1775 // Recursively process CTE subquery 1776 // This ensures that: 1777 // 1. Columns within the CTE are properly resolved 1778 // 2. Nested CTEs within this CTE are handled 1779 // 3. Later CTEs can reference this CTE's columns 1780 logDebug("Recursively processing CTE subquery: " + cteName); 1781 processSelectStatement(cteSubquery, cteScope); 1782 } 1783 1784 return cteScope; 1785 } 1786 1787 /** 1788 * Process GROUP BY clause and build GROUP BY scope 1789 */ 1790 private GroupByScope processGroupBy(TSelectSqlStatement select, SelectScope selectScope) { 1791 GroupByScope groupByScope = new GroupByScope(selectScope, select.getGroupByClause()); 1792 logDebug("Processing GROUP BY clause"); 1793 1794 // Set the FROM scope for column resolution 1795 if (selectScope.getFromScope() != null) { 1796 groupByScope.setFromScope(selectScope.getFromScope()); 1797 } 1798 1799 // Process column references in GROUP BY items 1800 if (select.getGroupByClause().getItems() != null) { 1801 for (int i = 0; i < select.getGroupByClause().getItems().size(); i++) { 1802 gudusoft.gsqlparser.nodes.TGroupByItem item = select.getGroupByClause().getItems().getGroupByItem(i); 1803 if (item.getExpr() != null) { 1804 List<TObjectName> groupByColumns = item.getExpr().getColumnsInsideExpression(); 1805 processColumnReferences(groupByColumns, groupByScope); 1806 } 1807 } 1808 } 1809 1810 return groupByScope; 1811 } 1812 1813 /** 1814 * Process HAVING clause and build HAVING scope 1815 */ 1816 private void processHaving(TSelectSqlStatement select, SelectScope selectScope, GroupByScope groupByScope) { 1817 logDebug("Processing HAVING clause"); 1818 1819 HavingScope havingScope = new HavingScope( 1820 selectScope, 1821 select.getGroupByClause().getHavingClause() 1822 ); 1823 1824 // Set GROUP BY scope for grouped column resolution 1825 if (groupByScope != null) { 1826 havingScope.setGroupByScope(groupByScope); 1827 } 1828 1829 // Set SELECT scope for alias resolution 1830 havingScope.setSelectScope(selectScope); 1831 1832 // Process column references in HAVING condition 1833 List<TObjectName> havingColumns = select.getGroupByClause().getHavingClause().getColumnsInsideExpression(); 1834 processColumnReferences(havingColumns, havingScope); 1835 } 1836 1837 /** 1838 * Process ORDER BY clause and build ORDER BY scope 1839 */ 1840 private void processOrderBy(TSelectSqlStatement select, SelectScope selectScope) { 1841 logDebug("Processing ORDER BY clause"); 1842 1843 OrderByScope orderByScope = new OrderByScope(selectScope, select.getOrderbyClause()); 1844 1845 // Set SELECT scope for alias resolution 1846 orderByScope.setSelectScope(selectScope); 1847 1848 // Set FROM scope for direct column resolution (database-dependent) 1849 if (selectScope.getFromScope() != null) { 1850 orderByScope.setFromScope(selectScope.getFromScope()); 1851 } 1852 1853 // Process column references in ORDER BY items 1854 if (select.getOrderbyClause().getItems() != null) { 1855 for (int i = 0; i < select.getOrderbyClause().getItems().size(); i++) { 1856 gudusoft.gsqlparser.nodes.TOrderByItem item = select.getOrderbyClause().getItems().getOrderByItem(i); 1857 if (item.getSortKey() != null) { 1858 List<TObjectName> orderByColumns = item.getSortKey().getColumnsInsideExpression(); 1859 processColumnReferences(orderByColumns, orderByScope); 1860 } 1861 } 1862 } 1863 } 1864 1865 /** 1866 * Create appropriate namespace for a table 1867 */ 1868 private INamespace createNamespaceForTable(TTable table) { 1869 // Check if it's a subquery 1870 if (table.getSubquery() != null) { 1871 return new SubqueryNamespace( 1872 table.getSubquery(), 1873 table.getAliasName(), 1874 config.getNameMatcher() 1875 ); 1876 } 1877 1878 // Regular table - pass sqlEnv and vendor for qualified name resolution 1879 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 1880 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 1881 return new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 1882 } 1883 1884 /** 1885 * Collect all TObjectName from TResultColumnList 1886 */ 1887 private List<TObjectName> collectObjectNamesFromResultColumns( 1888 gudusoft.gsqlparser.nodes.TResultColumnList resultColumns) { 1889 List<TObjectName> objNames = new ArrayList<>(); 1890 1891 for (int i = 0; i < resultColumns.size(); i++) { 1892 gudusoft.gsqlparser.nodes.TResultColumn rc = resultColumns.getResultColumn(i); 1893 if (rc.getExpr() != null) { 1894 // Get all column references from the expression 1895 List<TObjectName> exprColumns = rc.getExpr().getColumnsInsideExpression(); 1896 if (exprColumns != null) { 1897 objNames.addAll(exprColumns); 1898 } 1899 } 1900 } 1901 1902 return objNames; 1903 } 1904 1905 /** 1906 * Process column references (TObjectName list) 1907 */ 1908 private void processColumnReferences(List<TObjectName> objectNames, IScope scope) { 1909 if (objectNames == null) return; 1910 1911 for (TObjectName objName : objectNames) { 1912 // Record column-to-scope mapping for iterative resolution (Principle 1) 1913 columnToScopeMap.put(objName, scope); 1914 allColumnReferences.add(objName); 1915 1916 // Resolve the column reference 1917 nameResolver.resolve(objName, scope); 1918 1919 // Handle USING column priority for JOIN...USING syntax 1920 handleUsingColumnResolution(objName); 1921 1922 // Handle Teradata NAMED alias resolution 1923 handleTeradataNamedAliasResolution(objName); 1924 handleQualifyClauseAliasResolution(objName); 1925 } 1926 } 1927 1928 // Detailed legacy sync timing (for profiling) 1929 private static long globalTimeClearLinked = 0; 1930 private static long globalTimeFillAttributes = 0; 1931 private static long globalTimeSyncColumns = 0; 1932 private static long globalTimePopulateOrphans = 0; 1933 private static long globalTimeClearHints = 0; 1934 1935 /** 1936 * Get detailed legacy sync timing breakdown. 1937 */ 1938 public static String getLegacySyncTimings() { 1939 long total = globalTimeClearLinked + globalTimeFillAttributes + globalTimeSyncColumns + globalTimePopulateOrphans + globalTimeClearHints; 1940 return String.format( 1941 "LegacySync Breakdown:\n" + 1942 " ClearLinkedColumns: %d ms (%.1f%%)\n" + 1943 " FillTableAttributes: %d ms (%.1f%%)\n" + 1944 " SyncColumnToLegacy: %d ms (%.1f%%)\n" + 1945 " PopulateOrphanColumns: %d ms (%.1f%%)\n" + 1946 " ClearSyntaxHints: %d ms (%.1f%%)\n" + 1947 " Total: %d ms", 1948 globalTimeClearLinked, total > 0 ? 100.0 * globalTimeClearLinked / total : 0, 1949 globalTimeFillAttributes, total > 0 ? 100.0 * globalTimeFillAttributes / total : 0, 1950 globalTimeSyncColumns, total > 0 ? 100.0 * globalTimeSyncColumns / total : 0, 1951 globalTimePopulateOrphans, total > 0 ? 100.0 * globalTimePopulateOrphans / total : 0, 1952 globalTimeClearHints, total > 0 ? 100.0 * globalTimeClearHints / total : 0, 1953 total); 1954 } 1955 1956 /** 1957 * Create cloned columns for star column tracing. 1958 * 1959 * <p>This is a CORE part of TSQLResolver2's name resolution. When a column traces 1960 * through a CTE or subquery with SELECT * to a physical table, we create a cloned 1961 * TObjectName with sourceTable pointing to the traced physical table. 1962 * 1963 * <p>Example: 1964 * <pre> 1965 * WITH cte AS (SELECT * FROM physical_table) 1966 * SELECT a FROM cte 1967 * </pre> 1968 * 1969 * <p>For column 'a' in the outer SELECT: 1970 * <ul> 1971 * <li>Original column: sourceTable = cte (immediate source)</li> 1972 * <li>Cloned column: sourceTable = physical_table (traced through star)</li> 1973 * </ul> 1974 * 1975 * <p>Both columns are added to allColumnReferences for complete lineage tracking. 1976 * This ensures the formatter can output both the immediate source and the traced 1977 * physical table when needed. 1978 */ 1979 private void createTracedColumnClones() { 1980 // Collect clones to add (avoid ConcurrentModificationException) 1981 java.util.List<TObjectName> clonesToAdd = new java.util.ArrayList<>(); 1982 1983 // Build HashSet of existing (sourceTable identity, columnNameLower) pairs for O(1) dedup 1984 java.util.Set<String> existingKeys = new java.util.HashSet<>(); 1985 for (TObjectName existing : allColumnReferences) { 1986 if (existing.getSourceTable() != null) { 1987 String existingColName = existing.getColumnNameOnly(); 1988 if (existingColName != null) { 1989 String key = System.identityHashCode(existing.getSourceTable()) 1990 + ":" + existingColName.toLowerCase(); 1991 existingKeys.add(key); 1992 } 1993 } 1994 } 1995 1996 for (TObjectName column : allColumnReferences) { 1997 // Skip star columns - they represent all columns from a table and shouldn't be cloned 1998 String colName = column.getColumnNameOnly(); 1999 if (colName != null && colName.equals("*")) { 2000 continue; 2001 } 2002 2003 // Skip columns without resolution 2004 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = column.getResolution(); 2005 if (resolution == null || !resolution.isExactMatch()) { 2006 continue; 2007 } 2008 2009 gudusoft.gsqlparser.resolver2.model.ColumnSource source = resolution.getColumnSource(); 2010 if (source == null) { 2011 continue; 2012 } 2013 2014 TTable sourceTable = column.getSourceTable(); 2015 if (sourceTable == null) { 2016 continue; 2017 } 2018 2019 // Only process CTE or subquery columns 2020 if (!sourceTable.isCTEName() && sourceTable.getTableType() != ETableSource.subquery) { 2021 continue; 2022 } 2023 2024 // Get the traced physical table 2025 TTable finalTable = source.getFinalTable(); 2026 if (finalTable == null || finalTable == sourceTable) { 2027 continue; 2028 } 2029 2030 // Skip if finalTable is also a CTE or subquery 2031 if (finalTable.isCTEName() || finalTable.getTableType() == ETableSource.subquery) { 2032 continue; 2033 } 2034 2035 // Skip subquery columns when the column matches an explicit column in the subquery's 2036 // SELECT list. Cloning is only needed when tracing through star columns. 2037 // For example, in "SELECT al1.COL1, al1.COL3 FROM (SELECT t1.COL1, t2.* FROM t1, t2) al1": 2038 // - al1.COL1 matches explicit "t1.COL1" -> don't clone (stays at subquery level) 2039 // - al1.COL3 doesn't match explicit column, must come from t2.* -> clone to t2 2040 if (sourceTable.getTableType() == ETableSource.subquery) { 2041 TSelectSqlStatement subquery = sourceTable.getSubquery(); 2042 if (subquery != null && subqueryHasExplicitColumn(subquery, colName)) { 2043 continue; 2044 } 2045 } 2046 2047 // Skip UNION scenarios - syncToLegacyStructures already handles linking to all 2048 // UNION branch tables via getAllFinalTables(). Creating clones would cause duplicates. 2049 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 2050 if (allFinalTables != null && allFinalTables.size() > 1) { 2051 continue; 2052 } 2053 2054 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 2055 // subquery's underlying table via star column expansion. 2056 // This is particularly important for MERGE ON clause columns which may 2057 // belong to the target table rather than the source subquery. 2058 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 2059 // the source subquery. 2060 // Note: We check location only because ownStmt may be null for unresolved columns. 2061 if (column.getLocation() == ESqlClause.joinCondition 2062 && (column.getTableString() == null || column.getTableString().isEmpty())) { 2063 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2064 logInfo("createTracedColumnClones: Skipping unqualified join condition column " + column.toString() + 2065 " - should not be traced to subquery's underlying table"); 2066 } 2067 continue; 2068 } 2069 2070 // O(1) dedup check using HashSet instead of O(n) linear scan 2071 String dedupeKey = System.identityHashCode(finalTable) 2072 + ":" + (colName != null ? colName.toLowerCase() : ""); 2073 if (existingKeys.add(dedupeKey)) { 2074 // Clone the column and set sourceTable to the traced physical table 2075 TObjectName clonedColumn = column.clone(); 2076 clonedColumn.setSourceTable(finalTable); 2077 clonesToAdd.add(clonedColumn); 2078 2079 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2080 logInfo("createTracedColumnClones: Cloned column " + column.toString() + 2081 " with sourceTable traced from " + sourceTable.getTableName() + 2082 " to physical table " + finalTable.getTableName()); 2083 } 2084 } 2085 } 2086 2087 // Add all clones to allColumnReferences (local copy in TSQLResolver2) 2088 allColumnReferences.addAll(clonesToAdd); 2089 2090 // Also add to scopeBuildResult so consumers using scopeBuildResult.getAllColumnReferences() 2091 // (like TestGetTableColumn2 for star column expansion tests) can see the clones 2092 if (scopeBuildResult != null && !clonesToAdd.isEmpty()) { 2093 scopeBuildResult.addColumnReferences(clonesToAdd); 2094 } 2095 2096 // S3: tag every clone as SYNTHETIC_STAR_CLONE so the binding post-pass 2097 // (S5+) can skip them. Clones bypass NameResolver.resolve() entirely 2098 // (see comment block above) — without this tag the post-pass would see 2099 // a TObjectName in getAllColumnReferences() with no recorded 2100 // ResolutionResult and no skip reason, breaking the coverage invariant. 2101 if (resolutionContext.isBindingTraceEnabled() && !clonesToAdd.isEmpty()) { 2102 for (TObjectName clone : clonesToAdd) { 2103 resolutionContext.recordColumnSkipReason( 2104 clone, 2105 gudusoft.gsqlparser.resolver2.binding.BindingSkipReason.SYNTHETIC_STAR_CLONE); 2106 } 2107 } 2108 2109 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && !clonesToAdd.isEmpty()) { 2110 logInfo("createTracedColumnClones: Created " + clonesToAdd.size() + " traced column clones"); 2111 } 2112 } 2113 2114 /** 2115 * Sync results to legacy structures for backward compatibility. 2116 * This populates: 2117 * - TTable.linkedColumns: columns resolved to this table 2118 * - TObjectName.sourceTable: already set in setResolution() 2119 * - TObjectName.linkedColumnDef: from ColumnSource.definitionNode 2120 * - TObjectName.sourceColumn: from ColumnSource.definitionNode (if TResultColumn) 2121 */ 2122 private void syncToLegacyStructures() { 2123 if (!config.isLegacyCompatibilityEnabled()) { 2124 logInfo("Legacy compatibility disabled, skipping sync"); 2125 return; 2126 } 2127 2128 for (int i = 0; i < sqlStatements.size(); i++) { 2129 } 2130 2131 logInfo("Syncing to legacy structures..."); 2132 2133 long phaseStart; 2134 2135 // Clear existing linkedColumns on all tables 2136 phaseStart = System.currentTimeMillis(); 2137 clearAllLinkedColumns(); 2138 2139 // Clear existing orphanColumns on all statements 2140 // These will be repopulated in Phase 4b based on TSQLResolver2 resolution 2141 for (int i = 0; i < sqlStatements.size(); i++) { 2142 clearOrphanColumnsRecursive(sqlStatements.get(i)); 2143 } 2144 globalTimeClearLinked += System.currentTimeMillis() - phaseStart; 2145 2146 // Phase 1: Fill TTable.getAttributes() for all tables 2147 // This uses the namespace data already collected during resolution 2148 phaseStart = System.currentTimeMillis(); 2149 Set<TTable> processedTables = new HashSet<>(); 2150 for (int i = 0; i < sqlStatements.size(); i++) { 2151 fillTableAttributesRecursive(sqlStatements.get(i), processedTables); 2152 } 2153 globalTimeFillAttributes += System.currentTimeMillis() - phaseStart; 2154 logInfo("Filled attributes for " + processedTables.size() + " tables"); 2155 2156 // Phase 2: Iterate through all column references and sync to legacy structures 2157 phaseStart = System.currentTimeMillis(); 2158 int syncCount = 0; 2159 for (TObjectName column : allColumnReferences) { 2160 if (syncColumnToLegacy(column)) { 2161 syncCount++; 2162 } 2163 } 2164 globalTimeSyncColumns += System.currentTimeMillis() - phaseStart; 2165 2166 // Phase 3: Link CTAS target table columns 2167 // For CREATE TABLE AS SELECT, the SELECT list columns should be linked to the target table 2168 for (int i = 0; i < sqlStatements.size(); i++) { 2169 linkCTASTargetTableColumns(sqlStatements.get(i)); 2170 } 2171 2172 // Phase 4: Sync implicit database/schema from USE DATABASE/USE SCHEMA to AST 2173 // This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2174 syncImplicitDbSchemaToAST(); 2175 2176 // Phase 4b: Populate orphan columns 2177 // Columns with sourceTable=null (unresolved or ambiguous) should be added to 2178 // their containing statement's orphanColumns list. This enables TGetTableColumn 2179 // to report them as orphan columns (with linkOrphanColumnToFirstTable option). 2180 phaseStart = System.currentTimeMillis(); 2181 populateOrphanColumns(); 2182 globalTimePopulateOrphans += System.currentTimeMillis() - phaseStart; 2183 2184 // Phase 4c: Expand star columns using push-down inferred columns 2185 // For SELECT * and SELECT table.*, expand to individual columns based on: 2186 // 1. Inferred columns from the namespace (via push-down algorithm) 2187 // 2. This enables star column expansion without TSQLEnv metadata 2188 phaseStart = System.currentTimeMillis(); 2189 expandStarColumnsUsingPushDown(); 2190 long expandTime = System.currentTimeMillis() - phaseStart; 2191 logInfo("Star column expansion took " + expandTime + "ms"); 2192 2193 // Phase 5: Clear orphan column syntax hints for resolved columns 2194 // The old resolver adds "sphint" (syntax hint) warnings for columns that can't be resolved. 2195 // TSQLResolver2 resolves these columns but doesn't clear the syntax hints. 2196 // This phase cleans up those hints to maintain compatibility with tests expecting no hints. 2197 phaseStart = System.currentTimeMillis(); 2198 clearOrphanColumnSyntaxHints(); 2199 globalTimeClearHints += System.currentTimeMillis() - phaseStart; 2200 2201 logInfo("Legacy sync complete: " + syncCount + "/" + allColumnReferences.size() + " columns synced"); 2202 } 2203 2204 /** 2205 * Link SELECT list columns to CTAS target table. 2206 * For CREATE TABLE AS SELECT statements, the output column names (aliases) 2207 * should be linked to the target table. The source column references 2208 * remain linked to their source tables. 2209 * 2210 * NOTE: For CTAS, the parser (TCreateTableSqlStatement.doParseStatement) already 2211 * correctly creates and links alias columns to the target table. The source columns 2212 * that were incorrectly added are filtered out in clearLinkedColumnsRecursive(). 2213 * This method now only handles cases where the parser didn't create alias columns. 2214 */ 2215 private void linkCTASTargetTableColumns(TCustomSqlStatement stmt) { 2216 if (stmt == null) return; 2217 2218 // CTAS columns are already handled by the parser (TCreateTableSqlStatement.doParseStatement) 2219 // and incorrectly added source columns are filtered in clearLinkedColumnsRecursive(). 2220 // No additional processing needed here for CTAS. 2221 2222 // Process nested statements (for other statement types that might need CTAS handling) 2223 for (int i = 0; i < stmt.getStatements().size(); i++) { 2224 linkCTASTargetTableColumns(stmt.getStatements().get(i)); 2225 } 2226 } 2227 2228 /** 2229 * Populate orphanColumns for unresolved columns. 2230 * Columns with sourceTable=null should be added to their containing statement's orphanColumns. 2231 * This enables TGetTableColumn to report these as "missed" columns. 2232 */ 2233 private void populateOrphanColumns() { 2234 int addedCount = 0; 2235 for (TObjectName column : allColumnReferences) { 2236 if (column == null) continue; 2237 2238 // Skip non-column types that should not be in orphan columns 2239 EDbObjectType dbObjectType = column.getDbObjectType(); 2240 if (dbObjectType == EDbObjectType.column_alias // alias clause column definitions (e.g., AS x (numbers, animals)) 2241 || dbObjectType == EDbObjectType.variable // stored procedure variables 2242 || dbObjectType == EDbObjectType.parameter // stored procedure parameters 2243 || dbObjectType == EDbObjectType.cursor // cursors 2244 || dbObjectType == EDbObjectType.constant // constants 2245 || dbObjectType == EDbObjectType.label // labels 2246 ) { 2247 continue; 2248 } 2249 2250 // Check resolution status directly - ambiguous columns should be added to orphanColumns 2251 // Note: column.getColumnSource() returns the first candidate for ambiguous columns, 2252 // which would cause them to be incorrectly skipped. We need to check the resolution status first. 2253 // IMPORTANT: This check must come BEFORE the sourceTable check because Phase 1 (linkColumnToTable) 2254 // might have already set sourceTable during parsing, but TSQLResolver2 correctly marked it as ambiguous. 2255 // NOTE: Skip star columns (*) since they are handled specially via sourceTableList 2256 ResolutionResult resolution = column.getResolution(); 2257 String columnName = column.getColumnNameOnly(); 2258 boolean isStarColumn = columnName != null && columnName.equals("*"); 2259 2260 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS && !isStarColumn) { 2261 // Ambiguous columns should be added to orphanColumns so they appear as "missed" 2262 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) so the column 2263 // doesn't also appear as resolved in the output 2264 if (column.getSourceTable() != null) { 2265 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2266 logInfo("populateOrphanColumns: Clearing sourceTable for AMBIGUOUS column: " + column.toString() 2267 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2268 + " was linked to " + column.getSourceTable().getTableName() 2269 + " with " + (resolution.getAmbiguousSource() != null ? 2270 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 2271 } 2272 column.setSourceTable(null); 2273 } 2274 // Fall through to add to orphanColumns 2275 } else { 2276 // Star columns (*) should NEVER be orphan columns - they represent all columns 2277 // from all tables and are handled specially via sourceTableList and linked 2278 // to tables in syncColumnToLegacy() which runs after this phase. 2279 if (isStarColumn) { 2280 continue; 2281 } 2282 2283 // For non-ambiguous columns, skip if they have a sourceTable 2284 if (column.getSourceTable() != null) { 2285 continue; 2286 } 2287 2288 // Also skip columns that have a ColumnSource with a valid table 2289 ColumnSource source = column.getColumnSource(); 2290 if (source != null) { 2291 TTable finalTable = source.getFinalTable(); 2292 if (finalTable != null) { 2293 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2294 logInfo("populateOrphanColumns: Skipping column with ColumnSource: " + column.toString() 2295 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2296 + " -> resolved to " + finalTable.getTableName()); 2297 } 2298 continue; 2299 } 2300 // Also check overrideTable for derived table columns 2301 TTable overrideTable = source.getOverrideTable(); 2302 if (overrideTable != null) { 2303 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2304 logInfo("populateOrphanColumns: Skipping column with ColumnSource (override): " + column.toString() 2305 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2306 + " -> resolved to " + overrideTable.getTableName()); 2307 } 2308 continue; 2309 } 2310 } 2311 } 2312 2313 // Debug: log columns being added to orphan 2314 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2315 ColumnSource debugSource = column.getColumnSource(); 2316 logInfo("populateOrphanColumns: Adding orphan column: " + column.toString() 2317 + " at (" + column.getLineNo() + "," + column.getColumnNo() + ")" 2318 + ", hasColumnSource=" + (debugSource != null) 2319 + (debugSource != null ? ", namespace=" + (debugSource.getSourceNamespace() != null ? 2320 debugSource.getSourceNamespace().getClass().getSimpleName() : "null") : "")); 2321 } 2322 2323 // Find the containing statement for this column 2324 TCustomSqlStatement containingStmt = findContainingStatement(column); 2325 if (containingStmt != null) { 2326 // Set ownStmt so TSQLResolver2ResultFormatter can use getOwnStmt().getFirstPhysicalTable() 2327 // to link orphan columns to the first physical table (matching TGetTableColumn behavior) 2328 column.setOwnStmt(containingStmt); 2329 2330 TObjectNameList orphanColumns = containingStmt.getOrphanColumns(); 2331 if (orphanColumns != null && !containsColumn(orphanColumns, column)) { 2332 orphanColumns.addObjectName(column); 2333 addedCount++; 2334 } 2335 } else { 2336 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 2337 logInfo("Could not find containing statement for orphan column: " + column.toString()); 2338 } 2339 } 2340 } 2341 logInfo("Populated " + addedCount + " orphan columns"); 2342 } 2343 2344 /** 2345 * Find the statement that contains a column reference. 2346 * First tries to use the scope information (more reliable), then falls back to AST traversal. 2347 * For PL/SQL blocks, searches for the innermost DML statement that contains the column. 2348 */ 2349 private TCustomSqlStatement findContainingStatement(TObjectName column) { 2350 // First, try to use the scope information from columnToScopeMap 2351 // The scope's node is typically the containing statement 2352 IScope scope = columnToScopeMap.get(column); 2353 if (scope != null) { 2354 TParseTreeNode scopeNode = scope.getNode(); 2355 if (scopeNode instanceof TCustomSqlStatement) { 2356 TCustomSqlStatement stmt = (TCustomSqlStatement) scopeNode; 2357 // If the scope is a PL/SQL block or procedure, search for DML statements within it 2358 // that actually contain the column (by line number) 2359 if (isPLSQLBlockStatement(stmt)) { 2360 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2361 if (dmlStmt != null) { 2362 return dmlStmt; 2363 } 2364 } 2365 return stmt; 2366 } 2367 } 2368 2369 // Fallback: traverse up the AST to find the nearest TCustomSqlStatement parent 2370 TParseTreeNode node = column; 2371 while (node != null) { 2372 if (node instanceof TCustomSqlStatement) { 2373 return (TCustomSqlStatement) node; 2374 } 2375 node = node.getParentObjectName(); 2376 } 2377 2378 // Last resort: search all statements for a DML statement containing the column 2379 TCustomSqlStatement result = null; 2380 if (sqlStatements.size() > 0) { 2381 for (int i = 0; i < sqlStatements.size(); i++) { 2382 TCustomSqlStatement stmt = sqlStatements.get(i); 2383 TCustomSqlStatement dmlStmt = findDMLStatementContaining(stmt, column); 2384 if (dmlStmt != null) { 2385 result = dmlStmt; 2386 break; 2387 } 2388 } 2389 if (result == null) { 2390 result = sqlStatements.get(0); 2391 } 2392 } 2393 return result; 2394 } 2395 2396 /** 2397 * Check if a statement is a PL/SQL block type statement. 2398 */ 2399 private boolean isPLSQLBlockStatement(TCustomSqlStatement stmt) { 2400 if (stmt == null) return false; 2401 String className = stmt.getClass().getSimpleName(); 2402 return className.startsWith("TPlsql") || className.startsWith("TPLSql") || 2403 className.contains("Block") || className.contains("Procedure") || 2404 className.contains("Function") || className.contains("Package"); 2405 } 2406 2407 /** 2408 * DML Statement Range for efficient line-based lookup. 2409 * Used by the DML index cache (Performance Optimization A). 2410 */ 2411 private static class DmlRange implements Comparable<DmlRange> { 2412 final long startLine; 2413 final long endLine; 2414 final TCustomSqlStatement stmt; 2415 2416 DmlRange(TCustomSqlStatement stmt) { 2417 this.stmt = stmt; 2418 this.startLine = stmt.getStartToken() != null ? stmt.getStartToken().lineNo : -1; 2419 this.endLine = stmt.getEndToken() != null ? stmt.getEndToken().lineNo : -1; 2420 } 2421 2422 boolean contains(long line) { 2423 return startLine >= 0 && startLine <= line && line <= endLine; 2424 } 2425 2426 // Sort by startLine for binary search 2427 @Override 2428 public int compareTo(DmlRange other) { 2429 return Long.compare(this.startLine, other.startLine); 2430 } 2431 } 2432 2433 /** 2434 * Cache for DML statement ranges per parent statement (Performance Optimization A). 2435 * Built lazily on first access, cleared at start of each resolve() call. 2436 * Uses IdentityHashMap because we need object identity, not equals(). 2437 */ 2438 private final Map<TCustomSqlStatement, List<DmlRange>> dmlIndexCache = new IdentityHashMap<>(); 2439 2440 /** 2441 * Build DML index for a parent statement. 2442 */ 2443 private List<DmlRange> buildDmlIndex(TCustomSqlStatement parent) { 2444 final List<DmlRange> ranges = new ArrayList<>(); 2445 parent.acceptChildren(new TParseTreeVisitor() { 2446 @Override 2447 public void preVisit(TInsertSqlStatement stmt) { 2448 ranges.add(new DmlRange(stmt)); 2449 } 2450 @Override 2451 public void preVisit(TUpdateSqlStatement stmt) { 2452 ranges.add(new DmlRange(stmt)); 2453 } 2454 @Override 2455 public void preVisit(TDeleteSqlStatement stmt) { 2456 ranges.add(new DmlRange(stmt)); 2457 } 2458 @Override 2459 public void preVisit(TSelectSqlStatement stmt) { 2460 ranges.add(new DmlRange(stmt)); 2461 } 2462 }); 2463 // Sort by startLine for efficient lookup 2464 java.util.Collections.sort(ranges); 2465 return ranges; 2466 } 2467 2468 /** 2469 * Get or build the DML index for a parent statement (Performance Optimization A). 2470 */ 2471 private List<DmlRange> getDmlIndex(TCustomSqlStatement parent) { 2472 return dmlIndexCache.computeIfAbsent(parent, this::buildDmlIndex); 2473 } 2474 2475 /** 2476 * Find the innermost DML statement (INSERT/UPDATE/DELETE/SELECT) within a parent statement 2477 * that contains the given column reference (by line number range). 2478 * Uses cached DML index for O(log N) lookup instead of O(N) traversal. 2479 */ 2480 private TCustomSqlStatement findDMLStatementContaining(TCustomSqlStatement parent, TObjectName column) { 2481 if (parent == null || column == null) return null; 2482 2483 long columnLine = column.getLineNo(); 2484 TCustomSqlStatement result = null; 2485 2486 // Use cached DML index (Performance Optimization A) 2487 List<DmlRange> ranges = getDmlIndex(parent); 2488 2489 // Find all DML statements that contain the column by line number 2490 // Need to check all ranges that could contain the column (can't use pure binary search 2491 // because ranges can overlap and we want the innermost one) 2492 for (DmlRange range : ranges) { 2493 // Optimization: if startLine > columnLine, no more ranges can contain it 2494 if (range.startLine > columnLine) { 2495 break; 2496 } 2497 if (range.contains(columnLine)) { 2498 // Found a matching DML statement - prefer the innermost one (later startLine) 2499 if (result == null || 2500 (range.startLine >= result.getStartToken().lineNo)) { 2501 result = range.stmt; 2502 } 2503 } 2504 } 2505 2506 return result; 2507 } 2508 2509 /** 2510 * Sync implicit database/schema from USE DATABASE/USE SCHEMA statements to AST. 2511 * This enables TObjectName.getAnsiSchemaName() and getAnsiCatalogName() to work correctly 2512 * for unqualified object names. 2513 * 2514 * This is similar to what TDatabaseObjectResolver does in the legacy resolver: 2515 * it visits all TObjectName nodes and sets implicitDatabaseName/implicitSchemaName 2516 * based on the current database/schema context. 2517 */ 2518 private void syncImplicitDbSchemaToAST() { 2519 // Get the tracked database context 2520 TSQLEnv env = getSqlEnv(); 2521 if (env == null) { 2522 return; 2523 } 2524 2525 String defaultCatalog = env.getDefaultCatalogName(); 2526 String defaultSchema = env.getDefaultSchemaName(); 2527 2528 // If no defaults are set, nothing to sync 2529 if ((defaultCatalog == null || defaultCatalog.isEmpty()) && 2530 (defaultSchema == null || defaultSchema.isEmpty())) { 2531 return; 2532 } 2533 2534 logDebug("Syncing implicit DB/schema to AST: catalog=" + defaultCatalog + ", schema=" + defaultSchema); 2535 2536 // Visit all statements and set implicit names on TObjectName nodes 2537 for (int i = 0; i < sqlStatements.size(); i++) { 2538 TCustomSqlStatement stmt = sqlStatements.get(i); 2539 if (stmt != null) { 2540 stmt.acceptChildren(new ImplicitDbSchemaVisitor(defaultCatalog, defaultSchema)); 2541 } 2542 } 2543 } 2544 2545 /** 2546 * Visitor to set implicit database/schema on TObjectName nodes. 2547 */ 2548 private static class ImplicitDbSchemaVisitor extends TParseTreeVisitor { 2549 private final String defaultCatalog; 2550 private final String defaultSchema; 2551 2552 public ImplicitDbSchemaVisitor(String defaultCatalog, String defaultSchema) { 2553 this.defaultCatalog = defaultCatalog; 2554 this.defaultSchema = defaultSchema; 2555 } 2556 2557 @Override 2558 public void preVisit(TObjectName node) { 2559 if (node == null) return; 2560 2561 // Skip column objects - they don't need implicit DB/schema 2562 if (node.getDbObjectType() == EDbObjectType.column) return; 2563 2564 // Skip objects with a db_link - they refer to remote databases 2565 // and should not inherit the current session's default schema/catalog 2566 if (node.getDblink() != null) return; 2567 2568 // Set default database name if not qualified 2569 if (defaultCatalog != null && !defaultCatalog.isEmpty() && node.getDatabaseToken() == null) { 2570 node.setImplictDatabaseName(defaultCatalog); 2571 } 2572 2573 // Set default schema name if not qualified 2574 if (defaultSchema != null && !defaultSchema.isEmpty() && node.getSchemaToken() == null) { 2575 node.setImplictSchemaName(defaultSchema); 2576 } 2577 } 2578 } 2579 2580 /** 2581 * Selectively clear orphan column syntax hints (sphint) based on TSQLResolver2 resolution. 2582 * 2583 * Phase 1 (linkColumnToTable during parsing) adds sphint hints for columns it can't resolve. 2584 * TSQLResolver2 should: 2585 * 1. KEEP sphint hints for columns that are in allColumnReferences with NOT_FOUND/AMBIGUOUS status 2586 * (these are genuinely orphan/ambiguous columns) 2587 * 2. CLEAR sphint hints for all other columns: 2588 * - Columns successfully resolved (EXACT_MATCH) 2589 * - Columns filtered out by ScopeBuilder (package constants, function keywords, etc.) 2590 * - Columns in contexts TSQLResolver2 doesn't collect (MERGE VALUES, etc.) 2591 */ 2592 private void clearOrphanColumnSyntaxHints() { 2593 // Build a set of positions for columns that should KEEP their sphint hints 2594 // These are columns in allColumnReferences with NOT_FOUND or AMBIGUOUS status 2595 Set<String> orphanPositions = new HashSet<>(); 2596 2597 for (TObjectName col : allColumnReferences) { 2598 if (col == null) continue; 2599 gudusoft.gsqlparser.resolver2.model.ResolutionResult resolution = col.getResolution(); 2600 if (resolution != null) { 2601 ResolutionStatus status = resolution.getStatus(); 2602 // Only keep sphint for genuinely AMBIGUOUS columns 2603 // NOT_FOUND columns might be due to TSQLResolver2 scope issues (e.g., MERGE WHEN clause) 2604 // so we clear their sphint to match old resolver behavior 2605 if (status == ResolutionStatus.AMBIGUOUS) { 2606 TSourceToken startToken = col.getStartToken(); 2607 if (startToken != null) { 2608 String key = startToken.lineNo + ":" + startToken.columnNo; 2609 orphanPositions.add(key); 2610 } 2611 } 2612 } 2613 } 2614 2615 // Clear sphint hints for positions NOT in orphanPositions 2616 for (int i = 0; i < sqlStatements.size(); i++) { 2617 TCustomSqlStatement stmt = sqlStatements.get(i); 2618 if (stmt == null) continue; 2619 clearNonOrphanSphintHintsRecursive(stmt, orphanPositions); 2620 } 2621 } 2622 2623 /** 2624 * Recursively clear sphint hints except for genuinely orphan columns. 2625 */ 2626 private void clearNonOrphanSphintHintsRecursive(TCustomSqlStatement stmt, Set<String> orphanPositions) { 2627 if (stmt == null) return; 2628 2629 // Clear sphint hints that are NOT for genuinely orphan columns 2630 if (stmt.getSyntaxHints() != null && stmt.getSyntaxHints().size() > 0) { 2631 for (int j = stmt.getSyntaxHints().size() - 1; j >= 0; j--) { 2632 TSyntaxError syntaxError = stmt.getSyntaxHints().get(j); 2633 if (syntaxError.errortype == EErrorType.sphint) { 2634 String key = syntaxError.lineNo + ":" + syntaxError.columnNo; 2635 if (!orphanPositions.contains(key)) { 2636 // This sphint is NOT for a genuinely orphan column - clear it 2637 stmt.getSyntaxHints().remove(j); 2638 logDebug("Cleared sphint at line " + syntaxError.lineNo); 2639 } 2640 // Keep sphint hints for genuinely orphan columns (in orphanPositions) 2641 } 2642 } 2643 } 2644 2645 // Note: orphanColumns is populated by populateOrphanColumns() in Phase 4b 2646 // DO NOT clear it here - TGetTableColumn relies on orphanColumns for 2647 // linkOrphanColumnToFirstTable functionality 2648 2649 // Process nested statements 2650 for (int k = 0; k < stmt.getStatements().size(); k++) { 2651 clearNonOrphanSphintHintsRecursive(stmt.getStatements().get(k), orphanPositions); 2652 } 2653 } 2654 2655 2656 2657 /** 2658 * Filter UNNEST table's linkedColumns to keep only legitimate columns. 2659 * Phase 1 (linkColumnToTable) may incorrectly link external variables to UNNEST 2660 * when UNNEST is the only table in scope. This method removes such incorrect links. 2661 * 2662 * Legitimate columns for UNNEST: 2663 * - Implicit column: the alias (e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2664 * - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2665 * - Derived struct field columns (from UNNEST of STRUCT arrays) 2666 */ 2667 private void filterUnnestLinkedColumns(TTable unnestTable) { 2668 if (unnestTable == null || unnestTable.getTableType() != ETableSource.unnest) { 2669 return; 2670 } 2671 2672 TObjectNameList linkedColumns = unnestTable.getLinkedColumns(); 2673 if (linkedColumns == null || linkedColumns.size() == 0) { 2674 return; 2675 } 2676 2677 // Build set of legitimate column names 2678 java.util.Set<String> legitimateNames = new java.util.HashSet<>(); 2679 2680 // 1. Implicit column (alias name) 2681 String aliasName = unnestTable.getAliasName(); 2682 if (aliasName != null && !aliasName.isEmpty()) { 2683 legitimateNames.add(aliasName.toUpperCase()); 2684 } 2685 2686 // 2. WITH OFFSET column 2687 TUnnestClause unnestClause = unnestTable.getUnnestClause(); 2688 if (unnestClause != null && unnestClause.getWithOffset() != null) { 2689 if (unnestClause.getWithOffsetAlais() != null && 2690 unnestClause.getWithOffsetAlais().getAliasName() != null) { 2691 legitimateNames.add(unnestClause.getWithOffsetAlais().getAliasName().toString().toUpperCase()); 2692 } else { 2693 legitimateNames.add("OFFSET"); 2694 } 2695 } 2696 2697 // 3. Derived struct field columns 2698 if (unnestClause != null && unnestClause.getDerivedColumnList() != null) { 2699 for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) { 2700 TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i); 2701 if (derivedCol != null) { 2702 legitimateNames.add(derivedCol.toString().toUpperCase()); 2703 } 2704 } 2705 } 2706 2707 // 4. Explicit alias columns (Presto/Trino syntax: UNNEST(...) AS t(col1, col2)) 2708 if (unnestTable.getAliasClause() != null && 2709 unnestTable.getAliasClause().getColumns() != null) { 2710 for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) { 2711 TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i); 2712 if (colName != null) { 2713 legitimateNames.add(colName.toString().toUpperCase()); 2714 } 2715 } 2716 } 2717 2718 // Collect columns to keep 2719 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2720 for (int i = 0; i < linkedColumns.size(); i++) { 2721 TObjectName col = linkedColumns.getObjectName(i); 2722 if (col != null) { 2723 String colName = col.getColumnNameOnly(); 2724 if (colName != null && legitimateNames.contains(colName.toUpperCase())) { 2725 toKeep.add(col); 2726 } 2727 } 2728 } 2729 2730 // Clear and re-add only legitimate columns 2731 linkedColumns.clear(); 2732 for (TObjectName col : toKeep) { 2733 linkedColumns.addObjectName(col); 2734 } 2735 } 2736 2737 /** 2738 * Clear linkedColumns on all tables in all statements. 2739 */ 2740 private void clearAllLinkedColumns() { 2741 // Use a set to track processed statements and avoid processing duplicates 2742 // This is important when processing subqueries within tables, as the same 2743 // subquery might be reachable from multiple paths 2744 java.util.Set<TCustomSqlStatement> processed = new java.util.HashSet<>(); 2745 for (int i = 0; i < sqlStatements.size(); i++) { 2746 clearLinkedColumnsRecursive(sqlStatements.get(i), processed); 2747 } 2748 } 2749 2750 /** 2751 * Recursively clear orphanColumns on statements. 2752 * These will be repopulated with genuinely unresolved columns in Phase 4b. 2753 */ 2754 private void clearOrphanColumnsRecursive(TCustomSqlStatement stmt) { 2755 if (stmt == null) return; 2756 2757 if (stmt.getOrphanColumns() != null) { 2758 stmt.getOrphanColumns().clear(); 2759 } 2760 2761 // Process nested statements 2762 for (int i = 0; i < stmt.getStatements().size(); i++) { 2763 clearOrphanColumnsRecursive(stmt.getStatements().get(i)); 2764 } 2765 2766 // Also handle stored procedure/function body statements 2767 if (stmt instanceof gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) { 2768 gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement sp = 2769 (gudusoft.gsqlparser.stmt.TStoredProcedureSqlStatement) stmt; 2770 for (int i = 0; i < sp.getBodyStatements().size(); i++) { 2771 clearOrphanColumnsRecursive(sp.getBodyStatements().get(i)); 2772 } 2773 } 2774 } 2775 2776 private void clearLinkedColumnsRecursive(TCustomSqlStatement stmt, java.util.Set<TCustomSqlStatement> processed) { 2777 if (stmt == null) return; 2778 2779 // Skip if already processed to avoid redundant work and potential infinite loops 2780 if (processed.contains(stmt)) { 2781 return; 2782 } 2783 processed.add(stmt); 2784 2785 // Skip DAX statements - they populate their own linkedColumns during parsing 2786 // via TDaxFunction.doParse() which calls psql.linkColumnToTable() directly. 2787 // TSQLResolver2's ScopeBuilder doesn't traverse DAX expressions, so we must 2788 // preserve the linkedColumns that DAX parsing already established. 2789 if (stmt instanceof TDaxStmt) { 2790 return; 2791 } 2792 2793 // Skip ALTER TABLE statements - they populate linkedColumns during parsing 2794 // via TAlterTableOption.doParse() which directly adds columns to the target table's 2795 // linkedColumns. TSQLResolver2's ScopeBuilder doesn't traverse these option nodes, 2796 // so we must preserve the linkedColumns that parsing already established. 2797 if (stmt instanceof TAlterTableStatement) { 2798 return; 2799 } 2800 2801 // For CREATE TABLE statements, we need special handling: 2802 // - Regular CREATE TABLE (with column definitions): Preserve constraint columns 2803 // populated during TConstraint.doParse() 2804 // - CTAS (CREATE TABLE AS SELECT): Filter out source columns incorrectly added 2805 // to target table, but preserve the correctly created alias columns 2806 boolean isCreateTable = (stmt instanceof TCreateTableSqlStatement); 2807 if (isCreateTable) { 2808 TCreateTableSqlStatement ctas = (TCreateTableSqlStatement) stmt; 2809 boolean isCTAS = (ctas.getSubQuery() != null); 2810 // For CTAS, filter out source columns from target table's linkedColumns 2811 // The old resolver incorrectly adds source columns (from the SELECT) to the target table 2812 // Keep only columns whose sourceTable is the target table itself 2813 if (isCTAS && ctas.getTargetTable() != null) { 2814 TTable targetTable = ctas.getTargetTable(); 2815 TObjectNameList linkedColumns = targetTable.getLinkedColumns(); 2816 if (linkedColumns != null && linkedColumns.size() > 0) { 2817 // Collect columns to keep (those belonging to target table) 2818 java.util.List<TObjectName> toKeep = new java.util.ArrayList<>(); 2819 for (int i = 0; i < linkedColumns.size(); i++) { 2820 TObjectName col = linkedColumns.getObjectName(i); 2821 if (col != null && col.getSourceTable() == targetTable) { 2822 toKeep.add(col); 2823 } 2824 } 2825 // Clear and re-add only the columns to keep 2826 linkedColumns.clear(); 2827 for (TObjectName col : toKeep) { 2828 linkedColumns.addObjectName(col); 2829 } 2830 } 2831 } 2832 } 2833 2834 if (!isCreateTable && stmt.tables != null) { 2835 // Check if this statement contains a TD_UNPIVOT table 2836 // TD_UNPIVOT populates linkedColumns on its inner table during TTDUnpivot.doParse() 2837 // If we clear linkedColumns here, we lose those column references 2838 boolean hasTDUnpivot = false; 2839 for (int i = 0; i < stmt.tables.size(); i++) { 2840 TTable table = stmt.tables.getTable(i); 2841 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2842 hasTDUnpivot = true; 2843 break; 2844 } 2845 } 2846 2847 for (int i = 0; i < stmt.tables.size(); i++) { 2848 TTable table = stmt.tables.getTable(i); 2849 if (table != null && table.getLinkedColumns() != null) { 2850 // For UNNEST tables, filter out incorrectly linked columns from Phase 1. 2851 // Phase 1 (linkColumnToTable) may have linked external variables to UNNEST 2852 // when it's the only table in scope. Keep only legitimate columns: 2853 // - Implicit column (the UNNEST alias, e.g., "arry_pair" from "UNNEST(...) AS arry_pair") 2854 // - WITH OFFSET column (e.g., "pos" from "WITH OFFSET AS pos") 2855 if (table.getTableType() == ETableSource.unnest) { 2856 filterUnnestLinkedColumns(table); 2857 continue; 2858 } 2859 // Skip TD_UNPIVOT tables - they don't have their own columns but 2860 // TTDUnpivot.doParse() populates columns on the inner table 2861 if (table.getTableType() == ETableSource.td_unpivot) { 2862 continue; 2863 } 2864 // If this statement contains TD_UNPIVOT, skip clearing all tables 2865 // because TD_UNPIVOT populates linkedColumns on inner tables 2866 if (hasTDUnpivot) { 2867 continue; 2868 } 2869 table.getLinkedColumns().clear(); 2870 } 2871 } 2872 } 2873 2874 // Skip recursive processing if this statement contains TD_UNPIVOT 2875 // TD_UNPIVOT's inner table (in the ON clause) has columns populated during parsing 2876 // and those columns need to be preserved 2877 boolean hasTDUnpivot = false; 2878 if (stmt.tables != null) { 2879 for (int i = 0; i < stmt.tables.size(); i++) { 2880 TTable table = stmt.tables.getTable(i); 2881 if (table != null && table.getTableType() == ETableSource.td_unpivot) { 2882 hasTDUnpivot = true; 2883 break; 2884 } 2885 } 2886 } 2887 2888 if (!hasTDUnpivot) { 2889 for (int i = 0; i < stmt.getStatements().size(); i++) { 2890 clearLinkedColumnsRecursive(stmt.getStatements().get(i), processed); 2891 } 2892 2893 // Also process subqueries within tables - these are NOT in getStatements() 2894 // but are accessed via table.getSubquery() 2895 if (stmt.tables != null) { 2896 for (int i = 0; i < stmt.tables.size(); i++) { 2897 TTable table = stmt.tables.getTable(i); 2898 if (table != null && table.getSubquery() != null) { 2899 clearLinkedColumnsRecursive(table.getSubquery(), processed); 2900 } 2901 } 2902 } 2903 } 2904 } 2905 2906 /** 2907 * Recursively fill TTable.getAttributes() for all tables in a statement. 2908 * Uses namespace data already collected during name resolution. 2909 * 2910 * Processing order is important: 2911 * 1. Process CTEs first 2912 * 2. Process leaf tables (objectname, function, etc.) - not JOIN or subquery 2913 * 3. Process subqueries (recursively) 2914 * 4. Process JOIN tables last (they depend on child tables having attributes) 2915 */ 2916 private void fillTableAttributesRecursive(TCustomSqlStatement stmt, Set<TTable> processedTables) { 2917 if (stmt == null) return; 2918 2919 // Skip DAX statements - they use their own attribute/linkedColumn mechanism 2920 // established during TDaxFunction.doParse() parsing phase. 2921 if (stmt instanceof TDaxStmt) { 2922 return; 2923 } 2924 2925 // Skip ALTER TABLE statements - they use their own linkedColumn mechanism 2926 // established during TAlterTableOption.doParse() parsing phase. 2927 if (stmt instanceof TAlterTableStatement) { 2928 return; 2929 } 2930 2931 // Skip CREATE TABLE statements - they use their own linkedColumn mechanism 2932 // established during TConstraint.doParse() parsing phase. 2933 if (stmt instanceof TCreateTableSqlStatement) { 2934 return; 2935 } 2936 2937 // Phase 1: Process CTE tables first 2938 if (stmt instanceof TSelectSqlStatement) { 2939 TSelectSqlStatement selectStmt = (TSelectSqlStatement) stmt; 2940 TCTEList cteList = selectStmt.getCteList(); 2941 if (cteList != null) { 2942 for (int i = 0; i < cteList.size(); i++) { 2943 TCTE cte = cteList.getCTE(i); 2944 if (cte != null && cte.getSubquery() != null) { 2945 fillTableAttributesRecursive(cte.getSubquery(), processedTables); 2946 } 2947 } 2948 } 2949 } 2950 2951 // Collect tables by type for proper processing order 2952 List<TTable> leafTables = new ArrayList<>(); 2953 List<TTable> subqueryTables = new ArrayList<>(); 2954 List<TTable> joinTables = new ArrayList<>(); 2955 2956 // First, collect from stmt.tables 2957 if (stmt.tables != null) { 2958 for (int i = 0; i < stmt.tables.size(); i++) { 2959 TTable table = stmt.tables.getTable(i); 2960 if (table == null || processedTables.contains(table)) continue; 2961 2962 switch (table.getTableType()) { 2963 case join: 2964 joinTables.add(table); 2965 // Also collect nested tables within the join 2966 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2967 break; 2968 case subquery: 2969 subqueryTables.add(table); 2970 break; 2971 default: 2972 leafTables.add(table); 2973 break; 2974 } 2975 } 2976 } 2977 2978 // Also collect from getRelations() - JOIN tables are often stored there 2979 if (stmt.getRelations() != null) { 2980 for (int i = 0; i < stmt.getRelations().size(); i++) { 2981 IRelation rel = stmt.getRelations().get(i); 2982 if (!(rel instanceof TTable)) continue; 2983 TTable table = (TTable) rel; 2984 if (processedTables.contains(table)) continue; 2985 2986 if (table.getTableType() == ETableSource.join) { 2987 if (!joinTables.contains(table)) { 2988 joinTables.add(table); 2989 // Also collect nested tables within the join 2990 collectNestedJoinTables(table, leafTables, subqueryTables, joinTables, processedTables); 2991 } 2992 } 2993 } 2994 } 2995 2996 // Phase 2: Process leaf tables first (objectname, function, xml, etc.) 2997 for (TTable table : leafTables) { 2998 if (!processedTables.contains(table)) { 2999 fillTableAttributes(table, processedTables, stmt); 3000 processedTables.add(table); 3001 } 3002 } 3003 3004 // Phase 3: Process subqueries (recursively process their contents first) 3005 for (TTable table : subqueryTables) { 3006 if (!processedTables.contains(table)) { 3007 if (table.getSubquery() != null) { 3008 fillTableAttributesRecursive(table.getSubquery(), processedTables); 3009 } 3010 fillTableAttributes(table, processedTables, stmt); 3011 processedTables.add(table); 3012 } 3013 } 3014 3015 // Phase 4: Process JOIN tables last (they need child tables to have attributes) 3016 for (TTable table : joinTables) { 3017 if (!processedTables.contains(table)) { 3018 fillTableAttributes(table, processedTables, stmt); 3019 processedTables.add(table); 3020 } 3021 } 3022 3023 // Process nested statements 3024 for (int i = 0; i < stmt.getStatements().size(); i++) { 3025 fillTableAttributesRecursive(stmt.getStatements().get(i), processedTables); 3026 } 3027 } 3028 3029 /** 3030 * Collect nested tables within a JOIN expression. 3031 * This ensures all component tables are processed before the JOIN itself. 3032 */ 3033 private void collectNestedJoinTables(TTable joinTable, 3034 List<TTable> leafTables, 3035 List<TTable> subqueryTables, 3036 List<TTable> joinTables, 3037 Set<TTable> processedTables) { 3038 if (joinTable == null || joinTable.getJoinExpr() == null) return; 3039 3040 TJoinExpr joinExpr = joinTable.getJoinExpr(); 3041 3042 // Process left table 3043 TTable leftTable = joinExpr.getLeftTable(); 3044 if (leftTable != null && !processedTables.contains(leftTable)) { 3045 switch (leftTable.getTableType()) { 3046 case join: 3047 joinTables.add(leftTable); 3048 collectNestedJoinTables(leftTable, leafTables, subqueryTables, joinTables, processedTables); 3049 break; 3050 case subquery: 3051 subqueryTables.add(leftTable); 3052 break; 3053 default: 3054 leafTables.add(leftTable); 3055 break; 3056 } 3057 } 3058 3059 // Process right table 3060 TTable rightTable = joinExpr.getRightTable(); 3061 if (rightTable != null && !processedTables.contains(rightTable)) { 3062 switch (rightTable.getTableType()) { 3063 case join: 3064 joinTables.add(rightTable); 3065 collectNestedJoinTables(rightTable, leafTables, subqueryTables, joinTables, processedTables); 3066 break; 3067 case subquery: 3068 subqueryTables.add(rightTable); 3069 break; 3070 default: 3071 leafTables.add(rightTable); 3072 break; 3073 } 3074 } 3075 } 3076 3077 /** 3078 * Fill TTable.getAttributes() for a single table using namespace data. 3079 * This converts the namespace's columnSources to TAttributeNode objects. 3080 * 3081 * @param table The table to fill attributes for 3082 * @param processedTables Set of already processed tables to avoid duplicates 3083 * @param stmt The statement context (used for UNNEST to get the SELECT statement) 3084 */ 3085 private void fillTableAttributes(TTable table, Set<TTable> processedTables, TCustomSqlStatement stmt) { 3086 if (table == null) return; 3087 3088 // Clear existing attributes 3089 table.getAttributes().clear(); 3090 3091 String displayName = table.getDisplayName(true); 3092 if (displayName == null || displayName.isEmpty()) { 3093 displayName = table.getAliasName(); 3094 if (displayName == null || displayName.isEmpty()) { 3095 displayName = table.getName(); 3096 } 3097 } 3098 3099 // First, try to use existing namespace from ScopeBuildResult 3100 // Skip namespace lookup for UNNEST tables - they need special handling via initAttributesForUnnest 3101 INamespace existingNamespace = null; 3102 if (table.getTableType() != ETableSource.unnest) { 3103 existingNamespace = scopeBuildResult != null 3104 ? scopeBuildResult.getNamespaceForTable(table) 3105 : null; 3106 } 3107 3108 if (existingNamespace != null) { 3109 // Use existing namespace's column sources 3110 // Returns false if namespace has no real metadata (only inferred columns) 3111 if (fillAttributesFromNamespace(table, existingNamespace, displayName)) { 3112 return; 3113 } 3114 // Fall through to legacy logic if no real metadata 3115 } 3116 3117 // Fall back to type-specific handling if no namespace found 3118 switch (table.getTableType()) { 3119 case objectname: 3120 if (table.isCTEName()) { 3121 // CTE reference - use initAttributesFromCTE 3122 TCTE cte = table.getCTE(); 3123 if (cte != null) { 3124 table.initAttributesFromCTE(cte); 3125 } 3126 } else { 3127 // Physical table - create TableNamespace and extract columns 3128 fillPhysicalTableAttributes(table, displayName); 3129 } 3130 break; 3131 3132 case subquery: 3133 // Subquery - use initAttributesFromSubquery 3134 if (table.getSubquery() != null) { 3135 String prefix = ""; 3136 if (table.getAliasClause() != null) { 3137 prefix = table.getAliasClause().toString() + "."; 3138 } 3139 table.initAttributesFromSubquery(table.getSubquery(), prefix); 3140 } 3141 break; 3142 3143 case join: 3144 // JOIN - combine attributes from left and right tables 3145 // First, add USING columns to the left and right tables (if present) 3146 if (table.getJoinExpr() != null) { 3147 addUsingColumnsToTables(table.getJoinExpr()); 3148 // Then initialize the join expression's attributes (which pulls from left/right tables) 3149 table.getJoinExpr().initAttributes(0); 3150 } 3151 table.initAttributesForJoin(); 3152 break; 3153 3154 case function: 3155 // Table function 3156 table.initAttributeForTableFunction(); 3157 break; 3158 3159 case xmltable: 3160 // XML table 3161 table.initAttributeForXMLTable(); 3162 break; 3163 3164 case tableExpr: 3165 // Table expression 3166 TAttributeNode.addNodeToList( 3167 new TAttributeNode(displayName + ".*", table), 3168 table.getAttributes() 3169 ); 3170 break; 3171 3172 case rowList: 3173 // Row list 3174 table.initAttributeForRowList(); 3175 break; 3176 3177 case unnest: 3178 // UNNEST - initialize attributes using the SELECT statement context 3179 if (stmt instanceof TSelectSqlStatement) { 3180 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3181 table.initAttributesForUnnest(getSqlEnv(), select); 3182 } 3183 break; 3184 3185 case pivoted_table: 3186 // PIVOT table 3187 table.initAttributesForPivotTable(); 3188 break; 3189 } 3190 } 3191 3192 /** 3193 * Fill table attributes from an existing namespace's column sources. 3194 * This uses the namespace data that was collected during ScopeBuilder traversal. 3195 * 3196 * @return true if attributes were successfully filled, false if should fall back to legacy logic 3197 */ 3198 private boolean fillAttributesFromNamespace(TTable table, INamespace namespace, String displayName) { 3199 // Ensure namespace is validated 3200 if (!namespace.isValidated()) { 3201 namespace.validate(); 3202 } 3203 3204 // For TableNamespace without actual metadata (only inferred columns), 3205 // return false to fall back to legacy logic which uses wildcards 3206 if (namespace instanceof TableNamespace) { 3207 TableNamespace tableNs = (TableNamespace) namespace; 3208 // Check if the namespace has actual metadata by seeing if there are any columns 3209 // with high confidence from metadata sources (not inferred) 3210 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3211 boolean hasRealMetadata = false; 3212 for (ColumnSource source : columnSources.values()) { 3213 if (source.getConfidence() >= 1.0 && 3214 !("inferred_from_usage".equals(source.getEvidence()))) { 3215 hasRealMetadata = true; 3216 break; 3217 } 3218 } 3219 if (!hasRealMetadata) { 3220 // No real metadata, fall back to legacy logic with wildcards 3221 return false; 3222 } 3223 3224 // Has metadata - use namespace columns 3225 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3226 String colName = entry.getKey(); 3227 ColumnSource source = entry.getValue(); 3228 // Only include columns with real metadata, not inferred ones 3229 if (source.getConfidence() >= 1.0 && 3230 !("inferred_from_usage".equals(source.getEvidence()))) { 3231 TAttributeNode.addNodeToList( 3232 new TAttributeNode(displayName + "." + colName, table), 3233 table.getAttributes() 3234 ); 3235 } 3236 } 3237 3238 // If no columns after filtering, add wildcard 3239 if (table.getAttributes().isEmpty()) { 3240 TAttributeNode.addNodeToList( 3241 new TAttributeNode(displayName + ".*", table), 3242 table.getAttributes() 3243 ); 3244 } 3245 return true; 3246 } 3247 3248 // For other namespace types (SubqueryNamespace, CTENamespace, etc.), 3249 // use all column sources 3250 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3251 if (columnSources != null && !columnSources.isEmpty()) { 3252 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3253 String colName = entry.getKey(); 3254 TAttributeNode.addNodeToList( 3255 new TAttributeNode(displayName + "." + colName, table), 3256 table.getAttributes() 3257 ); 3258 } 3259 } 3260 3261 // If no columns found, add wildcard attribute 3262 if (table.getAttributes().isEmpty()) { 3263 TAttributeNode.addNodeToList( 3264 new TAttributeNode(displayName + ".*", table), 3265 table.getAttributes() 3266 ); 3267 } 3268 return true; 3269 } 3270 3271 /** 3272 * Fill attributes for a physical table using TableNamespace. 3273 */ 3274 private void fillPhysicalTableAttributes(TTable table, String displayName) { 3275 // Create namespace for this table with sqlEnv and vendor for qualified name resolution 3276 TSQLEnv sqlEnv = globalContext != null ? globalContext.getSqlEnv() : null; 3277 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvoracle; 3278 TableNamespace namespace = new TableNamespace(table, config.getNameMatcher(), sqlEnv, vendor); 3279 3280 // Validate to populate columnSources 3281 namespace.validate(); 3282 3283 // Convert columnSources to TAttributeNode 3284 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 3285 if (columnSources != null && !columnSources.isEmpty()) { 3286 for (Map.Entry<String, ColumnSource> entry : columnSources.entrySet()) { 3287 String colName = entry.getKey(); 3288 TAttributeNode.addNodeToList( 3289 new TAttributeNode(displayName + "." + colName, table), 3290 table.getAttributes() 3291 ); 3292 } 3293 } 3294 3295 // If no columns found from metadata, add wildcard attribute 3296 // (this allows any column to potentially match) 3297 if (table.getAttributes().isEmpty()) { 3298 // Add columns from linkedColumns if available 3299 if (table.getLinkedColumns() != null && table.getLinkedColumns().size() > 0) { 3300 for (TObjectName col : table.getLinkedColumns()) { 3301 if (col.getCandidateTables() != null && col.getCandidateTables().size() > 1) { 3302 continue; // Skip ambiguous columns 3303 } 3304 TAttributeNode.addNodeToList( 3305 new TAttributeNode(displayName + "." + col.getColumnNameOnly(), table), 3306 table.getAttributes() 3307 ); 3308 } 3309 } 3310 // Add wildcard attribute 3311 TAttributeNode.addNodeToList( 3312 new TAttributeNode(displayName + ".*", table), 3313 table.getAttributes() 3314 ); 3315 } 3316 } 3317 3318 /** 3319 * Add USING columns to the left and right tables in a JOIN expression. 3320 * USING columns should appear in both tables' attribute lists before the wildcard. 3321 * This method recursively handles nested JOINs. 3322 */ 3323 private void addUsingColumnsToTables(TJoinExpr joinExpr) { 3324 if (joinExpr == null) return; 3325 3326 // Recursively handle nested joins 3327 TTable leftTable = joinExpr.getLeftTable(); 3328 TTable rightTable = joinExpr.getRightTable(); 3329 3330 if (leftTable != null && leftTable.getTableType() == ETableSource.join && leftTable.getJoinExpr() != null) { 3331 addUsingColumnsToTables(leftTable.getJoinExpr()); 3332 } 3333 if (rightTable != null && rightTable.getTableType() == ETableSource.join && rightTable.getJoinExpr() != null) { 3334 addUsingColumnsToTables(rightTable.getJoinExpr()); 3335 } 3336 3337 // Handle USING columns in this join 3338 gudusoft.gsqlparser.nodes.TObjectNameList usingColumns = joinExpr.getUsingColumns(); 3339 if (usingColumns == null || usingColumns.size() == 0) return; 3340 3341 // Add USING columns to both tables 3342 for (int i = 0; i < usingColumns.size(); i++) { 3343 TObjectName usingCol = usingColumns.getObjectName(i); 3344 if (usingCol == null) continue; 3345 String colName = usingCol.getColumnNameOnly(); 3346 3347 // Add to left table (insert before wildcard if possible) 3348 if (leftTable != null && leftTable.getTableType() != ETableSource.join) { 3349 addColumnAttributeBeforeWildcard(leftTable, colName); 3350 } 3351 3352 // Add to right table (insert before wildcard if possible) 3353 if (rightTable != null && rightTable.getTableType() != ETableSource.join) { 3354 addColumnAttributeBeforeWildcard(rightTable, colName); 3355 } 3356 } 3357 } 3358 3359 /** 3360 * Add a column attribute to a table, inserting before the wildcard (*) if present. 3361 * This ensures USING columns appear before the wildcard in the attribute list. 3362 */ 3363 private void addColumnAttributeBeforeWildcard(TTable table, String columnName) { 3364 if (table == null || columnName == null) return; 3365 3366 String displayName = table.getDisplayName(true); 3367 if (displayName == null || displayName.isEmpty()) { 3368 displayName = table.getAliasName(); 3369 if (displayName == null || displayName.isEmpty()) { 3370 displayName = table.getName(); 3371 } 3372 } 3373 3374 String attrName = displayName + "." + columnName; 3375 3376 // Check if attribute already exists 3377 ArrayList<TAttributeNode> attrs = table.getAttributes(); 3378 for (TAttributeNode attr : attrs) { 3379 if (attr.getName().equalsIgnoreCase(attrName)) { 3380 return; // Already exists 3381 } 3382 } 3383 3384 // Find the wildcard position 3385 int wildcardIndex = -1; 3386 for (int i = 0; i < attrs.size(); i++) { 3387 if (attrs.get(i).getName().endsWith(".*")) { 3388 wildcardIndex = i; 3389 break; 3390 } 3391 } 3392 3393 // Insert before wildcard or add to end 3394 TAttributeNode newAttr = new TAttributeNode(attrName, table); 3395 if (wildcardIndex >= 0) { 3396 attrs.add(wildcardIndex, newAttr); 3397 } else { 3398 TAttributeNode.addNodeToList(newAttr, attrs); 3399 } 3400 } 3401 3402 /** 3403 * Sync a single column to legacy structures. 3404 * @return true if column was synced (had a sourceTable) 3405 */ 3406 private boolean syncColumnToLegacy(TObjectName column) { 3407 if (column == null) return false; 3408 3409 // Special handling for star columns (SELECT *) 3410 // Star columns represent ALL tables in the FROM clause and should be synced to ALL tables 3411 // in their sourceTableList, not just the first one. 3412 String columnName = column.getColumnNameOnly(); 3413 if (columnName != null && columnName.equals("*")) { 3414 java.util.ArrayList<TTable> sourceTableList = column.getSourceTableList(); 3415 if (sourceTableList != null && sourceTableList.size() > 0) { 3416 boolean synced = false; 3417 for (TTable starTable : sourceTableList) { 3418 if (starTable == null) continue; 3419 // Skip subquery types - the star should be linked to physical tables 3420 if (starTable.getTableType() == ETableSource.subquery) continue; 3421 gudusoft.gsqlparser.nodes.TObjectNameList starLinkedColumns = starTable.getLinkedColumns(); 3422 if (starLinkedColumns != null && !containsColumn(starLinkedColumns, column)) { 3423 starLinkedColumns.addObjectName(column); 3424 synced = true; 3425 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3426 logInfo("syncColumnToLegacy: Synced star column to sourceTableList table: " 3427 + starTable.getTableName()); 3428 } 3429 } 3430 } 3431 return synced; 3432 } 3433 } 3434 3435 // Check if column is AMBIGUOUS - don't sync to legacy if it's ambiguous 3436 // Ambiguous columns should be added to orphanColumns, not linkedColumns 3437 // NOTE: Skip this check for star columns (*) since they are handled specially 3438 // via sourceTableList and should be linked to all tables in the FROM clause 3439 ResolutionResult resolution = column.getResolution(); 3440 if (resolution != null && resolution.getStatus() == ResolutionStatus.AMBIGUOUS) { 3441 // Don't treat star columns as ambiguous - they're supposed to match all tables 3442 if (columnName != null && columnName.equals("*")) { 3443 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3444 logInfo("syncColumnToLegacy: Star column has AMBIGUOUS status, proceeding with normal sync"); 3445 } 3446 // Fall through to normal processing 3447 } else { 3448 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3449 logInfo("syncColumnToLegacy: Skipping AMBIGUOUS column: " + column.toString() 3450 + " with " + (resolution.getAmbiguousSource() != null ? 3451 resolution.getAmbiguousSource().getCandidateCount() : 0) + " candidates"); 3452 } 3453 // Clear sourceTable if it was set by Phase 1 (linkColumnToTable) 3454 // This ensures the column will be treated as orphan by TGetTableColumn 3455 if (column.getSourceTable() != null) { 3456 column.setSourceTable(null); 3457 } 3458 return false; 3459 } 3460 } 3461 3462 TTable sourceTable = column.getSourceTable(); 3463 ColumnSource source = column.getColumnSource(); 3464 3465 // Handle columns resolved through PlsqlVariableNamespace 3466 // These are stored procedure variables/parameters - mark them as variables 3467 // so they won't be added to orphan columns 3468 if (source != null && source.getSourceNamespace() instanceof gudusoft.gsqlparser.resolver2.namespace.PlsqlVariableNamespace) { 3469 column.setDbObjectTypeDirectly(EDbObjectType.variable); 3470 // Variables don't need to be linked to tables 3471 return false; 3472 } 3473 3474 // Fix for subquery columns: When a column is EXPLICITLY QUALIFIED with a subquery alias 3475 // (e.g., mm.material_id), the old resolver Phase 1 may have incorrectly set sourceTable 3476 // to the physical table inside the subquery. TSQLResolver2 should correct this to point 3477 // to the subquery TTable itself. This preserves the intermediate layer for data lineage: 3478 // mm.material_id -> subquery mm -> physical table 3479 // 3480 // IMPORTANT: Only apply this correction for QUALIFIED columns. Unqualified columns 3481 // (like those inferred from star column expansion) should keep their physical table 3482 // sourceTable for proper data lineage tracing. 3483 if (source != null && column.isQualified()) { 3484 INamespace ns = source.getSourceNamespace(); 3485 if (ns instanceof SubqueryNamespace) { 3486 TTable subqueryTable = ns.getSourceTable(); 3487 // If the subquery's TTable is different from the current sourceTable, 3488 // use the subquery's TTable to maintain proper semantic layering 3489 if (subqueryTable != null && subqueryTable != sourceTable) { 3490 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3491 logInfo("syncColumnToLegacy: Correcting sourceTable from " + 3492 (sourceTable != null ? sourceTable.getTableName() : "null") + 3493 " to subquery " + subqueryTable.getTableName() + " for qualified column " + column.toString()); 3494 } 3495 sourceTable = subqueryTable; 3496 column.setSourceTable(sourceTable); 3497 } 3498 } 3499 } 3500 3501 // If sourceTable is null, try to get it from ColumnSource 3502 // This handles columns resolved to derived tables (subqueries with aliases) 3503 // where TSQLResolver2 resolved via ColumnSource but didn't set sourceTable on TObjectName 3504 if (sourceTable == null && source != null) { 3505 // For alias columns (isColumnAlias) or passthroughs to aliases (getFinalColumnName != null), 3506 // prefer the immediate source table (subquery/CTE) over the traced physical table. 3507 // The alias name doesn't exist in the physical table, so linking with alias name is wrong. 3508 boolean isAliasColumn = source.isColumnAlias() || source.getFinalColumnName() != null; 3509 if (isAliasColumn) { 3510 INamespace ns = source.getSourceNamespace(); 3511 if (ns != null) { 3512 TTable immediateTable = ns.getSourceTable(); 3513 if (immediateTable != null) { 3514 sourceTable = immediateTable; 3515 column.setSourceTable(sourceTable); 3516 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3517 logInfo("syncColumnToLegacy: Set sourceTable to immediate source for alias column " 3518 + column.toString() + " -> " + immediateTable.getTableName()); 3519 } 3520 } 3521 } 3522 } 3523 if (sourceTable == null) { 3524 TTable finalTable = source.getFinalTable(); 3525 if (finalTable != null) { 3526 sourceTable = finalTable; 3527 column.setSourceTable(sourceTable); 3528 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3529 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getFinalTable() for " 3530 + column.toString() + " -> " + finalTable.getTableName()); 3531 } 3532 } else { 3533 // Try getAllFinalTables() - this may succeed when getFinalTable() returns null 3534 // For example, columns inferred through star push-down may have overrideTable set 3535 // which getAllFinalTables() will return as a single-element list 3536 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3537 if (allFinalTables != null && !allFinalTables.isEmpty()) { 3538 // Use the first non-subquery table from allFinalTables 3539 for (TTable candidateTable : allFinalTables) { 3540 if (candidateTable != null && candidateTable.getTableType() != ETableSource.subquery) { 3541 sourceTable = candidateTable; 3542 column.setSourceTable(sourceTable); 3543 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3544 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getAllFinalTables() for " 3545 + column.toString() + " -> " + candidateTable.getTableName()); 3546 } 3547 break; 3548 } 3549 } 3550 } 3551 3552 // Fallback: try overrideTable for cases like derived tables in JOIN ON clauses 3553 if (sourceTable == null) { 3554 TTable overrideTable = source.getOverrideTable(); 3555 if (overrideTable != null) { 3556 sourceTable = overrideTable; 3557 column.setSourceTable(sourceTable); 3558 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3559 logInfo("syncColumnToLegacy: Set sourceTable from ColumnSource.getOverrideTable() for " 3560 + column.toString() + " -> " + overrideTable.getTableName()); 3561 } 3562 } 3563 } 3564 } 3565 } 3566 } 3567 3568 if (sourceTable == null) { 3569 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && source != null) { 3570 logInfo("syncColumnToLegacy: Column " + column.toString() 3571 + " has ColumnSource but no table. Namespace: " 3572 + (source.getSourceNamespace() != null ? source.getSourceNamespace().getClass().getSimpleName() : "null") 3573 + ", evidence: " + source.getEvidence()); 3574 } 3575 return false; 3576 } 3577 3578 // For struct-field access (e.g., customer.customer_id in BigQuery), 3579 // create a synthetic column representing the base column (e.g., "customer") 3580 // instead of using the original column which has the field name (e.g., "customer_id") 3581 if (source != null && source.isStructFieldAccess()) { 3582 String baseColumnName = source.getExposedName(); 3583 if (baseColumnName != null && !baseColumnName.isEmpty()) { 3584 // Create synthetic TObjectName for the base column 3585 EDbVendor vendor = config != null ? config.getVendor() : EDbVendor.dbvbigquery; 3586 TObjectName baseColumn = TObjectName.createObjectName( 3587 vendor, EDbObjectType.column, baseColumnName); 3588 baseColumn.setSourceTable(sourceTable); 3589 3590 // Add the base column to linkedColumns (avoid duplicates by name) 3591 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3592 if (linkedColumns != null && !containsColumnByName(linkedColumns, baseColumnName)) { 3593 linkedColumns.addObjectName(baseColumn); 3594 } 3595 return true; // Skip adding the original struct-qualified column to linkedColumns. 3596 // DataFlowAnalyzer uses FieldPath from the original TObjectName to match 3597 // against the synthetic base column via getStructFieldFullName(). 3598 } 3599 } 3600 3601 // 1. Add to TTable.linkedColumns (avoid duplicates) 3602 gudusoft.gsqlparser.nodes.TObjectNameList linkedColumns = sourceTable.getLinkedColumns(); 3603 if (linkedColumns != null && !containsColumn(linkedColumns, column)) { 3604 linkedColumns.addObjectName(column); 3605 } 3606 3607 // 2. For UNION scenarios, also add to all final tables from UNION branches 3608 // This is critical for star column push-down tests that expect columns to be 3609 // linked to ALL tables in a UNION, not just the first one. 3610 if (source != null) { 3611 java.util.List<TTable> allFinalTables = source.getAllFinalTables(); 3612 if (allFinalTables != null && allFinalTables.size() > 1) { 3613 for (TTable unionTable : allFinalTables) { 3614 if (unionTable == null || unionTable == sourceTable) continue; 3615 // Skip subquery types - only link to physical tables 3616 if (unionTable.getTableType() == ETableSource.subquery) continue; 3617 gudusoft.gsqlparser.nodes.TObjectNameList unionLinkedColumns = unionTable.getLinkedColumns(); 3618 if (unionLinkedColumns != null && !containsColumn(unionLinkedColumns, column)) { 3619 unionLinkedColumns.addObjectName(column); 3620 } 3621 } 3622 } 3623 3624 // 2b. For CTE columns, also link to the CTE reference table 3625 // When a column is resolved through a CTE, it should be linked to both: 3626 // - The CTE reference table (immediate source) 3627 // - The underlying physical tables (final source) 3628 INamespace ns = source.getSourceNamespace(); 3629 if (ns instanceof gudusoft.gsqlparser.resolver2.namespace.CTENamespace) { 3630 gudusoft.gsqlparser.resolver2.namespace.CTENamespace cteNs = 3631 (gudusoft.gsqlparser.resolver2.namespace.CTENamespace) ns; 3632 TTable cteTable = cteNs.getReferencingTable(); 3633 if (cteTable != null && cteTable != sourceTable) { 3634 gudusoft.gsqlparser.nodes.TObjectNameList cteLinkedColumns = cteTable.getLinkedColumns(); 3635 if (cteLinkedColumns != null && !containsColumn(cteLinkedColumns, column)) { 3636 cteLinkedColumns.addObjectName(column); 3637 } 3638 } 3639 } 3640 3641 // 2c. For subquery columns, also link to the underlying physical tables 3642 // When sourceTable is a subquery (e.g., qualified column S.id from MERGE USING subquery), 3643 // TGetTableColumn needs the column to be linked to physical tables for output. 3644 // Use getFinalTable() to trace through to the ultimate physical table. 3645 // IMPORTANT: Only link if a column with the same name doesn't already exist - 3646 // this avoids duplicates when both outer and inner queries reference the same column. 3647 // EXCEPTION: Skip MERGE ON clause columns - they should not be linked to the source 3648 // subquery's underlying table because they may belong to the target table instead. 3649 if (sourceTable.getTableType() == ETableSource.subquery) { 3650 // Skip UNQUALIFIED join condition columns - they should not be traced to the source 3651 // subquery's underlying table via star column expansion. 3652 // This is particularly important for MERGE ON clause columns which may 3653 // belong to the target table rather than the source subquery. 3654 // QUALIFIED columns (like S.id) should still be traced as they explicitly reference 3655 // the source subquery. 3656 // Note: We check location only because ownStmt may be null for unresolved columns. 3657 boolean isUnqualifiedJoinConditionColumn = (column.getLocation() == ESqlClause.joinCondition) 3658 && (column.getTableString() == null || column.getTableString().isEmpty()); 3659 if (isUnqualifiedJoinConditionColumn && TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3660 logInfo("syncColumnToLegacy: Skipping unqualified join condition column " + column.toString() + 3661 " - should not be traced to subquery's underlying table"); 3662 } 3663 3664 // Skip alias columns - the alias name doesn't exist in the physical table, 3665 // so linking an alias-named column to the physical table produces wrong output 3666 // (e.g., TestTableEmployee.name instead of TestTableEmployee.ename). 3667 // getFinalTable() traces through aliases to find the physical table, but the 3668 // column name is still the alias. Only non-alias columns should be linked. 3669 boolean isAliasColumnForLinking = source.isColumnAlias() || source.getFinalColumnName() != null; 3670 3671 if (!isUnqualifiedJoinConditionColumn && !isAliasColumnForLinking) { 3672 TTable finalTable = source.getFinalTable(); 3673 if (finalTable != null && finalTable != sourceTable && 3674 finalTable.getTableType() != ETableSource.subquery) { 3675 gudusoft.gsqlparser.nodes.TObjectNameList finalLinkedColumns = finalTable.getLinkedColumns(); 3676 if (finalLinkedColumns != null && !containsColumn(finalLinkedColumns, column) 3677 && !containsColumnByName(finalLinkedColumns, column.getColumnNameOnly())) { 3678 finalLinkedColumns.addObjectName(column); 3679 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3680 logInfo("syncColumnToLegacy: Also linked " + column.toString() + 3681 " to underlying physical table " + finalTable.getTableName()); 3682 } 3683 } 3684 } 3685 } 3686 } 3687 3688 } 3689 3690 // 3. Sync linkedColumnDef and sourceColumn from ColumnSource 3691 if (source != null) { 3692 Object defNode = source.getDefinitionNode(); 3693 3694 // Set linkedColumnDef if definition is a TColumnDefinition 3695 if (defNode instanceof gudusoft.gsqlparser.nodes.TColumnDefinition) { 3696 column.setLinkedColumnDef((gudusoft.gsqlparser.nodes.TColumnDefinition) defNode); 3697 } 3698 3699 // Set sourceColumn if definition is a TResultColumn 3700 // BUT skip for CTE explicit columns - these reference the CTE column name (e.g., "mgr_dept") 3701 // not the underlying SELECT column (e.g., "grp"). The CTE column is a TObjectName, 3702 // not a TResultColumn, so we cannot set it as sourceColumn. 3703 if (defNode instanceof TResultColumn) { 3704 String evidence = source.getEvidence(); 3705 boolean isCTEExplicitColumn = evidence != null && evidence.startsWith("cte_explicit_column"); 3706 if (!isCTEExplicitColumn) { 3707 column.setSourceColumn((TResultColumn) defNode); 3708 } 3709 } 3710 // Special case: for star-inferred columns, set sourceColumn to the star column 3711 // The definitionNode is intentionally null to avoid affecting formatter output, 3712 // but we still need to set sourceColumn for legacy API compatibility. 3713 // Use setSourceColumnOnly to avoid changing dbObjectType which affects filtering. 3714 else if (defNode == null && source.getEvidence() != null 3715 && source.getEvidence().contains("auto_inferred")) { 3716 // This is a star-inferred column - get the star column from the namespace 3717 INamespace namespace = source.getSourceNamespace(); 3718 if (namespace != null) { 3719 TResultColumn starColumn = namespace.getStarColumn(); 3720 if (starColumn != null) { 3721 column.setSourceColumnOnly(starColumn); 3722 } 3723 } 3724 } 3725 } 3726 3727 return true; 3728 } 3729 3730 /** 3731 * Check if a column already exists in the list (by identity). 3732 */ 3733 private boolean containsColumn(gudusoft.gsqlparser.nodes.TObjectNameList list, TObjectName column) { 3734 for (int i = 0; i < list.size(); i++) { 3735 if (list.getObjectName(i) == column) { 3736 return true; 3737 } 3738 } 3739 return false; 3740 } 3741 3742 /** 3743 * Check if a column with the given name already exists in the list. 3744 * Used for struct-field access where we create synthetic columns. 3745 */ 3746 private boolean containsColumnByName(gudusoft.gsqlparser.nodes.TObjectNameList list, String columnName) { 3747 if (columnName == null) return false; 3748 // Normalize by stripping quotes for comparison 3749 String normalizedName = stripQuotes(columnName); 3750 for (int i = 0; i < list.size(); i++) { 3751 TObjectName col = list.getObjectName(i); 3752 if (col != null) { 3753 String existingName = stripQuotes(col.getColumnNameOnly()); 3754 if (normalizedName.equalsIgnoreCase(existingName)) { 3755 return true; 3756 } 3757 } 3758 } 3759 return false; 3760 } 3761 3762 /** 3763 * Strip leading/trailing quote characters from a string. 3764 */ 3765 private String stripQuotes(String s) { 3766 if (s == null) return null; 3767 if (s.length() >= 2) { 3768 char first = s.charAt(0); 3769 char last = s.charAt(s.length() - 1); 3770 if ((first == '"' && last == '"') || 3771 (first == '\'' && last == '\'') || 3772 (first == '`' && last == '`') || 3773 (first == '[' && last == ']')) { 3774 return s.substring(1, s.length() - 1); 3775 } 3776 } 3777 return s; 3778 } 3779 3780 /** 3781 * Check if a subquery SELECT statement has an explicit (non-star) column with the given name. 3782 * This is used to determine whether to create traced column clones: 3783 * - If the column matches an explicit column in the subquery, don't clone (stays at subquery level) 3784 * - If the column doesn't match explicit columns (must come from star), clone to physical table 3785 * 3786 * @param subquery the SELECT statement to check 3787 * @param columnName the column name to look for (may have quotes) 3788 * @return true if the subquery has an explicit column matching the name 3789 */ 3790 private boolean subqueryHasExplicitColumn(TSelectSqlStatement subquery, String columnName) { 3791 if (subquery == null || columnName == null) { 3792 return false; 3793 } 3794 3795 // For combined queries (UNION/INTERSECT/EXCEPT), follow left chain iteratively 3796 TSelectSqlStatement current = subquery; 3797 while (current.isCombinedQuery()) { 3798 current = current.getLeftStmt(); 3799 if (current == null) { 3800 return false; 3801 } 3802 } 3803 subquery = current; 3804 3805 TResultColumnList resultColumns = subquery.getResultColumnList(); 3806 if (resultColumns == null) { 3807 return false; 3808 } 3809 3810 // Normalize the column name for comparison (strip quotes) 3811 String normalizedName = stripQuotes(columnName); 3812 3813 for (int i = 0; i < resultColumns.size(); i++) { 3814 TResultColumn rc = resultColumns.getResultColumn(i); 3815 if (rc == null) { 3816 continue; 3817 } 3818 3819 String colStr = rc.toString(); 3820 // Skip star columns - they're not explicit columns 3821 if (colStr != null && (colStr.equals("*") || colStr.endsWith(".*"))) { 3822 continue; 3823 } 3824 3825 // Get the effective column name (alias if present, otherwise the column name) 3826 String effectiveName = null; 3827 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 3828 effectiveName = rc.getAliasClause().getAliasName().toString(); 3829 } else if (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null) { 3830 // For simple column references like "t1.COL1", get the column name 3831 effectiveName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 3832 } 3833 3834 if (effectiveName != null) { 3835 String normalizedEffective = stripQuotes(effectiveName); 3836 if (normalizedName.equalsIgnoreCase(normalizedEffective)) { 3837 return true; 3838 } 3839 } 3840 } 3841 3842 return false; 3843 } 3844 3845 /** 3846 * Expand star columns using push-down inferred columns from namespaces. 3847 * 3848 * This is the core of the star column push-down algorithm: 3849 * 1. Find all star columns in SELECT lists 3850 * 2. For each star column, find its source namespace(s) 3851 * 3. Get inferred columns from the namespace (collected during resolution) 3852 * 4. Expand the star column by populating attributeNodesDerivedFromFromClause 3853 * 3854 * This enables star column expansion without TSQLEnv metadata by using 3855 * columns referenced in outer queries to infer what the star expands to. 3856 */ 3857 private void expandStarColumnsUsingPushDown() { 3858 int expandedCount = 0; 3859 Set<TCustomSqlStatement> processedStmts = new HashSet<>(); 3860 3861 // Track expanded star columns by their string representation for syncing 3862 Map<String, ArrayList<TAttributeNode>> expandedStarCols = new HashMap<>(); 3863 3864 // Process all statements recursively 3865 for (int i = 0; i < sqlStatements.size(); i++) { 3866 expandedCount += expandStarColumnsInStatement(sqlStatements.get(i), processedStmts, expandedStarCols); 3867 } 3868 3869 // Sync expanded attributes to column references in getAllColumnReferences() 3870 // The result column TObjectNames might be different instances than those collected 3871 // during scope building, so we need to copy the expanded attrs 3872 if (scopeBuildResult != null && !expandedStarCols.isEmpty()) { 3873 for (TObjectName colRef : scopeBuildResult.getAllColumnReferences()) { 3874 if (colRef == null) continue; 3875 String colStr = colRef.toString(); 3876 if (colStr == null || !colStr.endsWith("*")) continue; 3877 3878 // Skip if already has expanded attrs 3879 ArrayList<TAttributeNode> existingAttrs = colRef.getAttributeNodesDerivedFromFromClause(); 3880 if (existingAttrs != null && !existingAttrs.isEmpty()) continue; 3881 3882 // Find matching expanded star column 3883 ArrayList<TAttributeNode> expandedAttrs = expandedStarCols.get(colStr); 3884 if (expandedAttrs != null && !expandedAttrs.isEmpty()) { 3885 // Copy the expanded attrs to this column reference 3886 for (TAttributeNode attr : expandedAttrs) { 3887 TAttributeNode.addNodeToList(attr, existingAttrs); 3888 } 3889 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 3890 logInfo("Synced " + expandedAttrs.size() + " expanded attrs to column reference: " + colStr); 3891 } 3892 } 3893 } 3894 } 3895 3896 logInfo("Expanded star columns using push-down: " + expandedCount + " columns added"); 3897 } 3898 3899 /** 3900 * Recursively expand star columns in a statement and its nested statements. 3901 * Uses processedStmts to track ALL statements (not just SELECTs) to prevent infinite loops. 3902 */ 3903 private int expandStarColumnsInStatement(TCustomSqlStatement stmt, Set<TCustomSqlStatement> processedStmts, 3904 Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 3905 if (stmt == null) return 0; 3906 3907 // Cycle detection: skip if already processed this statement 3908 if (processedStmts.contains(stmt)) { 3909 return 0; 3910 } 3911 processedStmts.add(stmt); 3912 3913 int count = 0; 3914 3915 // Handle SELECT statements 3916 if (stmt instanceof TSelectSqlStatement) { 3917 TSelectSqlStatement select = (TSelectSqlStatement) stmt; 3918 count += expandStarColumnsInSelect(select, expandedStarCols); 3919 3920 // Handle UNION/INTERSECT/EXCEPT - iteratively collect all branches 3921 if (select.isCombinedQuery()) { 3922 Deque<TSelectSqlStatement> unionStack = new ArrayDeque<>(); 3923 if (select.getLeftStmt() != null) unionStack.push(select.getLeftStmt()); 3924 if (select.getRightStmt() != null) unionStack.push(select.getRightStmt()); 3925 while (!unionStack.isEmpty()) { 3926 TSelectSqlStatement branch = unionStack.pop(); 3927 if (branch == null || processedStmts.contains(branch)) continue; 3928 processedStmts.add(branch); 3929 count += expandStarColumnsInSelect(branch, expandedStarCols); 3930 if (branch.isCombinedQuery()) { 3931 if (branch.getLeftStmt() != null) unionStack.push(branch.getLeftStmt()); 3932 if (branch.getRightStmt() != null) unionStack.push(branch.getRightStmt()); 3933 } else { 3934 // Process tables with subqueries in this branch 3935 if (branch.tables != null) { 3936 for (int i = 0; i < branch.tables.size(); i++) { 3937 TTable table = branch.tables.getTable(i); 3938 if (table != null && table.getSubquery() != null) { 3939 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3940 } 3941 } 3942 } 3943 if (branch.getCteList() != null) { 3944 for (int i = 0; i < branch.getCteList().size(); i++) { 3945 TCTE cte = branch.getCteList().getCTE(i); 3946 if (cte != null && cte.getSubquery() != null) { 3947 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3948 } 3949 } 3950 } 3951 } 3952 } 3953 } 3954 } 3955 3956 // Handle MERGE statements specially - process the USING clause 3957 if (stmt instanceof gudusoft.gsqlparser.stmt.TMergeSqlStatement) { 3958 gudusoft.gsqlparser.stmt.TMergeSqlStatement merge = (gudusoft.gsqlparser.stmt.TMergeSqlStatement) stmt; 3959 TTable usingTable = merge.getUsingTable(); 3960 if (usingTable != null && usingTable.getSubquery() != null) { 3961 count += expandStarColumnsInStatement(usingTable.getSubquery(), processedStmts, expandedStarCols); 3962 } 3963 } 3964 3965 // Process nested statements 3966 if (stmt.getStatements() != null) { 3967 for (int i = 0; i < stmt.getStatements().size(); i++) { 3968 Object nested = stmt.getStatements().get(i); 3969 if (nested instanceof TCustomSqlStatement) { 3970 count += expandStarColumnsInStatement((TCustomSqlStatement) nested, processedStmts, expandedStarCols); 3971 } 3972 } 3973 } 3974 3975 // Process tables with subqueries 3976 if (stmt.tables != null) { 3977 for (int i = 0; i < stmt.tables.size(); i++) { 3978 TTable table = stmt.tables.getTable(i); 3979 if (table != null && table.getSubquery() != null) { 3980 count += expandStarColumnsInStatement(table.getSubquery(), processedStmts, expandedStarCols); 3981 } 3982 } 3983 } 3984 3985 // Process CTEs 3986 if (stmt.getCteList() != null) { 3987 for (int i = 0; i < stmt.getCteList().size(); i++) { 3988 TCTE cte = stmt.getCteList().getCTE(i); 3989 if (cte != null && cte.getSubquery() != null) { 3990 count += expandStarColumnsInStatement(cte.getSubquery(), processedStmts, expandedStarCols); 3991 } 3992 } 3993 } 3994 3995 return count; 3996 } 3997 3998 /** 3999 * Expand star columns in a SELECT statement's result column list. 4000 */ 4001 private int expandStarColumnsInSelect(TSelectSqlStatement select, Map<String, ArrayList<TAttributeNode>> expandedStarCols) { 4002 if (select == null || select.getResultColumnList() == null) return 0; 4003 4004 int count = 0; 4005 TResultColumnList resultCols = select.getResultColumnList(); 4006 4007 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4008 logInfo("expandStarColumnsInSelect: Processing SELECT with " + resultCols.size() + " result columns"); 4009 } 4010 4011 for (int i = 0; i < resultCols.size(); i++) { 4012 TResultColumn rc = resultCols.getResultColumn(i); 4013 if (rc == null || rc.getExpr() == null) continue; 4014 4015 TObjectName objName = rc.getExpr().getObjectOperand(); 4016 if (objName == null) continue; 4017 4018 String colStr = objName.toString(); 4019 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE && colStr != null) { 4020 logInfo("expandStarColumnsInSelect: Column " + i + ": " + colStr); 4021 } 4022 if (colStr == null || !colStr.endsWith("*")) continue; 4023 4024 // This is a star column - expand it 4025 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4026 logInfo("expandStarColumnsInSelect: Found star column: " + colStr); 4027 } 4028 count += expandSingleStarColumn(objName, select, colStr, rc); 4029 4030 // Track the expanded attrs for syncing to column references 4031 ArrayList<TAttributeNode> attrList = objName.getAttributeNodesDerivedFromFromClause(); 4032 if (attrList != null && !attrList.isEmpty()) { 4033 expandedStarCols.put(colStr, attrList); 4034 } 4035 } 4036 4037 return count; 4038 } 4039 4040 /** 4041 * Expand a single star column using push-down inferred columns. 4042 * 4043 * @param starColumn The star column TObjectName (e.g., "*" or "src.*") 4044 * @param select The containing SELECT statement 4045 * @param colStr The string representation of the star column 4046 * @param resultColumn The TResultColumn containing the star (for EXCEPT column list) 4047 * @return Number of columns added 4048 */ 4049 private int expandSingleStarColumn(TObjectName starColumn, TSelectSqlStatement select, String colStr, TResultColumn resultColumn) { 4050 ArrayList<TAttributeNode> attrList = starColumn.getAttributeNodesDerivedFromFromClause(); 4051 4052 // Skip if already expanded 4053 if (!attrList.isEmpty()) { 4054 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4055 logInfo("expandSingleStarColumn: " + colStr + " already expanded with " + attrList.size() + " attrs"); 4056 } 4057 return 0; 4058 } 4059 4060 // Collect EXCEPT column names to exclude from expansion 4061 // (BigQuery: SELECT * EXCEPT (col1, col2) FROM ...) 4062 Set<String> exceptColumns = new HashSet<>(); 4063 if (resultColumn != null) { 4064 TObjectNameList exceptList = resultColumn.getExceptColumnList(); 4065 if (exceptList != null && exceptList.size() > 0) { 4066 for (int i = 0; i < exceptList.size(); i++) { 4067 TObjectName exceptCol = exceptList.getObjectName(i); 4068 if (exceptCol != null) { 4069 String exceptName = exceptCol.getColumnNameOnly(); 4070 if (exceptName != null && !exceptName.isEmpty()) { 4071 exceptColumns.add(exceptName.toUpperCase()); 4072 } 4073 } 4074 } 4075 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4076 logInfo("expandSingleStarColumn: Found " + exceptColumns.size() + 4077 " EXCEPT columns: " + exceptColumns); 4078 } 4079 } 4080 } 4081 4082 int count = 0; 4083 boolean isQualified = colStr.contains(".") && !colStr.equals("*"); 4084 4085 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4086 logInfo("expandSingleStarColumn: " + colStr + " isQualified=" + isQualified); 4087 } 4088 4089 if (isQualified) { 4090 // Qualified star (e.g., "src.*") - find the specific table/namespace 4091 String tablePrefix = colStr.substring(0, colStr.lastIndexOf('.')); 4092 count += expandQualifiedStar(starColumn, select, tablePrefix, attrList, exceptColumns); 4093 } else { 4094 // Unqualified star (*) - expand from all tables in FROM clause 4095 count += expandUnqualifiedStar(starColumn, select, attrList, exceptColumns); 4096 } 4097 4098 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4099 logInfo("expandSingleStarColumn: " + colStr + " expanded to " + count + " columns"); 4100 } 4101 4102 return count; 4103 } 4104 4105 /** 4106 * Expand a qualified star column (e.g., "src.*") using namespace inferred columns. 4107 * 4108 * @param starColumn The star column TObjectName 4109 * @param select The containing SELECT statement 4110 * @param tablePrefix The table prefix (e.g., "src" from "src.*") 4111 * @param attrList The list to add expanded attributes to 4112 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4113 */ 4114 private int expandQualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4115 String tablePrefix, ArrayList<TAttributeNode> attrList, 4116 Set<String> exceptColumns) { 4117 int count = 0; 4118 4119 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4120 logInfo("expandQualifiedStar: tablePrefix=" + tablePrefix + 4121 ", exceptColumns=" + (exceptColumns != null ? exceptColumns : "none")); 4122 } 4123 4124 // Find the source table by alias or name 4125 TTable sourceTable = findTableByPrefixInSelect(select, tablePrefix); 4126 if (sourceTable == null) { 4127 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4128 logInfo("expandQualifiedStar: No source table found for " + tablePrefix); 4129 } 4130 // Fall back to just adding the qualified star attribute 4131 TAttributeNode.addNodeToList( 4132 new TAttributeNode(tablePrefix + ".*", null), 4133 attrList 4134 ); 4135 return 0; 4136 } 4137 4138 // Collect inferred columns from multiple sources: 4139 // 1. The table's own namespace (TableNamespace) 4140 // 2. If the SELECT is a CTE definition, the CTE's namespace 4141 // 3. If the SELECT is a subquery, the containing scope's namespace 4142 Set<String> allInferredCols = new HashSet<>(); 4143 4144 // Source 1: Get namespace for this table 4145 INamespace tableNamespace = scopeBuildResult != null 4146 ? scopeBuildResult.getNamespaceForTable(sourceTable) 4147 : null; 4148 4149 if (tableNamespace != null) { 4150 Set<String> inferredCols = tableNamespace.getInferredColumns(); 4151 if (inferredCols != null) { 4152 allInferredCols.addAll(inferredCols); 4153 } 4154 } 4155 4156 // Source 2: Check if this SELECT is part of a CTE definition 4157 // If so, the CTE namespace may have inferred columns from outer queries 4158 Set<String> cteInferredCols = getInferredColumnsFromContainingCTE(select); 4159 if (cteInferredCols != null) { 4160 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4161 logInfo("expandQualifiedStar: Adding " + cteInferredCols.size() + 4162 " CTE inferred columns for " + tablePrefix); 4163 } 4164 allInferredCols.addAll(cteInferredCols); 4165 } 4166 4167 // Source 3: Check the SELECT's output scope for inferred columns 4168 // IMPORTANT: For qualified star columns (like ta.*), only use scope-level inferred columns 4169 // if they actually exist in this table's namespace. Otherwise we'd incorrectly add columns 4170 // from other tables in the FROM clause to this star's expanded attributes. 4171 IScope selectScope = scopeBuildResult != null 4172 ? scopeBuildResult.getScopeForStatement(select) 4173 : null; 4174 if (selectScope != null) { 4175 Set<String> scopeInferredCols = getInferredColumnsFromScope(selectScope); 4176 if (scopeInferredCols != null && tableNamespace != null) { 4177 // Only add scope-level inferred columns that actually exist in this table's namespace 4178 // This prevents columns from other tables being incorrectly associated with this star 4179 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4180 Set<String> tableInferredCols = tableNamespace.getInferredColumns(); 4181 for (String scopeCol : scopeInferredCols) { 4182 // Check if this column can be resolved within this table's namespace 4183 boolean hasInNamespace = (columnSources != null && columnSources.containsKey(scopeCol)) || 4184 (tableInferredCols != null && tableInferredCols.contains(scopeCol)); 4185 if (hasInNamespace) { 4186 allInferredCols.add(scopeCol); 4187 } 4188 } 4189 } else if (scopeInferredCols != null && tableNamespace == null) { 4190 // No table namespace - add all scope columns (fallback for edge cases) 4191 allInferredCols.addAll(scopeInferredCols); 4192 } 4193 } 4194 4195 if (!allInferredCols.isEmpty()) { 4196 // Expand using inferred columns, filtering out EXCEPT columns 4197 for (String colName : allInferredCols) { 4198 // Skip columns in EXCEPT clause 4199 if (exceptColumns != null && !exceptColumns.isEmpty() && 4200 exceptColumns.contains(colName.toUpperCase())) { 4201 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4202 logInfo("expandQualifiedStar: Skipping EXCEPT column: " + colName); 4203 } 4204 continue; 4205 } 4206 String attrName = tablePrefix + "." + colName; 4207 TAttributeNode.addNodeToList( 4208 new TAttributeNode(attrName, sourceTable), 4209 attrList 4210 ); 4211 count++; 4212 } 4213 } else if (tableNamespace != null) { 4214 // No inferred columns - try to get from namespace's column sources 4215 Map<String, ColumnSource> columnSources = tableNamespace.getAllColumnSources(); 4216 if (columnSources != null && !columnSources.isEmpty()) { 4217 for (String colName : columnSources.keySet()) { 4218 // Skip columns in EXCEPT clause 4219 if (exceptColumns != null && !exceptColumns.isEmpty() && 4220 exceptColumns.contains(colName.toUpperCase())) { 4221 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4222 logInfo("expandQualifiedStar: Skipping EXCEPT column from sources: " + colName); 4223 } 4224 continue; 4225 } 4226 String attrName = tablePrefix + "." + colName; 4227 TAttributeNode.addNodeToList( 4228 new TAttributeNode(attrName, sourceTable), 4229 attrList 4230 ); 4231 count++; 4232 } 4233 } 4234 } 4235 4236 // If no columns were added, add the star as fallback 4237 if (count == 0) { 4238 TAttributeNode.addNodeToList( 4239 new TAttributeNode(tablePrefix + ".*", sourceTable), 4240 attrList 4241 ); 4242 } 4243 4244 return count; 4245 } 4246 4247 /** 4248 * Get inferred columns from a CTE that contains the given SELECT statement. 4249 * Used for push-down: when outer queries reference columns from a CTE, 4250 * those columns are inferred in the CTE's namespace and should be used 4251 * to expand star columns in the CTE's SELECT. 4252 */ 4253 private Set<String> getInferredColumnsFromContainingCTE(TSelectSqlStatement select) { 4254 if (select == null || scopeBuildResult == null || namespaceEnhancer == null) { 4255 return null; 4256 } 4257 4258 // Find the CTE that defines this SELECT 4259 Set<INamespace> starNamespaces = namespaceEnhancer.getStarNamespaces(); 4260 if (starNamespaces == null) { 4261 return null; 4262 } 4263 4264 for (INamespace ns : starNamespaces) { 4265 if (ns instanceof CTENamespace) { 4266 CTENamespace cteNs = (CTENamespace) ns; 4267 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 4268 // Check both by reference and by start token position 4269 if (cteSelect == select || 4270 (cteSelect != null && select != null && 4271 cteSelect.getStartToken() != null && select.getStartToken() != null && 4272 cteSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4273 Set<String> inferredCols = cteNs.getInferredColumns(); 4274 if (inferredCols != null && !inferredCols.isEmpty()) { 4275 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4276 logInfo("getInferredColumnsFromContainingCTE: Found CTE " + cteNs.getDisplayName() + 4277 " with " + inferredCols.size() + " inferred columns"); 4278 } 4279 return inferredCols; 4280 } 4281 } 4282 } else if (ns instanceof SubqueryNamespace) { 4283 SubqueryNamespace subNs = (SubqueryNamespace) ns; 4284 TSelectSqlStatement subSelect = subNs.getSelectStatement(); 4285 if (subSelect == select || 4286 (subSelect != null && select != null && 4287 subSelect.getStartToken() != null && select.getStartToken() != null && 4288 subSelect.getStartToken().posinlist == select.getStartToken().posinlist)) { 4289 Set<String> inferredCols = subNs.getInferredColumns(); 4290 if (inferredCols != null && !inferredCols.isEmpty()) { 4291 if (TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 4292 logInfo("getInferredColumnsFromContainingCTE: Found Subquery with " + 4293 inferredCols.size() + " inferred columns"); 4294 } 4295 return inferredCols; 4296 } 4297 } 4298 } 4299 } 4300 4301 return null; 4302 } 4303 4304 /** 4305 * Get inferred columns from namespaces in a scope's FROM clause. 4306 */ 4307 private Set<String> getInferredColumnsFromScope(IScope scope) { 4308 if (scope == null) { 4309 return null; 4310 } 4311 4312 Set<String> result = new HashSet<>(); 4313 4314 // Check all namespaces in the scope's children 4315 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4316 INamespace ns = child.getNamespace(); 4317 if (ns != null) { 4318 Set<String> inferredCols = ns.getInferredColumns(); 4319 if (inferredCols != null) { 4320 result.addAll(inferredCols); 4321 } 4322 } 4323 } 4324 4325 return result.isEmpty() ? null : result; 4326 } 4327 4328 /** 4329 * Expand an unqualified star column (*) using all tables in FROM clause. 4330 * 4331 * @param starColumn The star column TObjectName 4332 * @param select The containing SELECT statement 4333 * @param attrList The list to add expanded attributes to 4334 * @param exceptColumns Column names to exclude (from EXCEPT clause), uppercase 4335 */ 4336 private int expandUnqualifiedStar(TObjectName starColumn, TSelectSqlStatement select, 4337 ArrayList<TAttributeNode> attrList, Set<String> exceptColumns) { 4338 int count = 0; 4339 4340 if (select.tables == null) return 0; 4341 4342 for (int i = 0; i < select.tables.size(); i++) { 4343 TTable table = select.tables.getTable(i); 4344 if (table == null) continue; 4345 4346 // Skip certain table types 4347 if (table.getTableType() == ETableSource.join) continue; 4348 4349 String tablePrefix = table.getAliasName(); 4350 if (tablePrefix == null || tablePrefix.isEmpty()) { 4351 tablePrefix = table.getName(); 4352 } 4353 if (tablePrefix == null) continue; 4354 4355 // Get namespace for this table 4356 INamespace namespace = scopeBuildResult != null 4357 ? scopeBuildResult.getNamespaceForTable(table) 4358 : null; 4359 4360 if (namespace != null) { 4361 Set<String> inferredCols = namespace.getInferredColumns(); 4362 4363 if (inferredCols != null && !inferredCols.isEmpty()) { 4364 for (String colName : inferredCols) { 4365 // Skip columns in EXCEPT clause 4366 if (exceptColumns != null && !exceptColumns.isEmpty() && 4367 exceptColumns.contains(colName.toUpperCase())) { 4368 continue; 4369 } 4370 String attrName = tablePrefix + "." + colName; 4371 TAttributeNode.addNodeToList( 4372 new TAttributeNode(attrName, table), 4373 attrList 4374 ); 4375 count++; 4376 } 4377 } else { 4378 Map<String, ColumnSource> columnSources = namespace.getAllColumnSources(); 4379 if (columnSources != null && !columnSources.isEmpty()) { 4380 for (String colName : columnSources.keySet()) { 4381 // Skip columns in EXCEPT clause 4382 if (exceptColumns != null && !exceptColumns.isEmpty() && 4383 exceptColumns.contains(colName.toUpperCase())) { 4384 continue; 4385 } 4386 String attrName = tablePrefix + "." + colName; 4387 TAttributeNode.addNodeToList( 4388 new TAttributeNode(attrName, table), 4389 attrList 4390 ); 4391 count++; 4392 } 4393 } 4394 } 4395 } 4396 4397 // If no columns for this table, add the star as fallback 4398 if (count == 0 || (namespace != null && namespace.getInferredColumns().isEmpty() 4399 && namespace.getAllColumnSources().isEmpty())) { 4400 TAttributeNode.addNodeToList( 4401 new TAttributeNode(tablePrefix + ".*", table), 4402 attrList 4403 ); 4404 } 4405 } 4406 4407 return count; 4408 } 4409 4410 /** 4411 * Find a table by its prefix (alias or name) in a SELECT statement. 4412 */ 4413 private TTable findTableByPrefixInSelect(TSelectSqlStatement select, String prefix) { 4414 if (select == null || select.tables == null || prefix == null) return null; 4415 4416 // Normalize prefix (remove backticks, quotes, schema prefix for comparison) 4417 String normalizedPrefix = normalizeTablePrefix(prefix); 4418 4419 for (int i = 0; i < select.tables.size(); i++) { 4420 TTable table = select.tables.getTable(i); 4421 if (table == null) continue; 4422 4423 // Check alias first 4424 String alias = table.getAliasName(); 4425 if (alias != null && normalizeTablePrefix(alias).equalsIgnoreCase(normalizedPrefix)) { 4426 return table; 4427 } 4428 4429 // Check table name 4430 String name = table.getName(); 4431 if (name != null && normalizeTablePrefix(name).equalsIgnoreCase(normalizedPrefix)) { 4432 return table; 4433 } 4434 4435 // Check full table name (with schema) 4436 if (table.getTableName() != null) { 4437 String fullName = table.getTableName().toString(); 4438 if (fullName != null && normalizeTablePrefix(fullName).equalsIgnoreCase(normalizedPrefix)) { 4439 return table; 4440 } 4441 } 4442 } 4443 4444 return null; 4445 } 4446 4447 /** 4448 * Normalize table prefix for comparison (remove quotes, backticks). 4449 */ 4450 private String normalizeTablePrefix(String prefix) { 4451 if (prefix == null) return ""; 4452 String result = prefix.trim(); 4453 // Remove backticks 4454 if (result.startsWith("`") && result.endsWith("`")) { 4455 result = result.substring(1, result.length() - 1); 4456 } 4457 // Remove double quotes 4458 if (result.startsWith("\"") && result.endsWith("\"")) { 4459 result = result.substring(1, result.length() - 1); 4460 } 4461 // Remove brackets 4462 if (result.startsWith("[") && result.endsWith("]")) { 4463 result = result.substring(1, result.length() - 1); 4464 } 4465 return result; 4466 } 4467 4468 /** 4469 * Get resolution statistics 4470 */ 4471 public ResolutionStatistics getStatistics() { 4472 return resolutionContext.getStatistics(); 4473 } 4474 4475 /** 4476 * Get the resolution context (for advanced queries) 4477 */ 4478 public ResolutionContext getContext() { 4479 return resolutionContext; 4480 } 4481 4482 /** 4483 * Get the global scope 4484 */ 4485 public GlobalScope getGlobalScope() { 4486 return globalScope; 4487 } 4488 4489 /** 4490 * Get the configuration 4491 */ 4492 public TSQLResolverConfig getConfig() { 4493 return config; 4494 } 4495 4496 /** 4497 * Get the pass history (for iterative resolution analysis) 4498 * 4499 * @return list of all resolution passes (empty if non-iterative or not yet resolved) 4500 */ 4501 public List<ResolutionPass> getPassHistory() { 4502 return new ArrayList<>(passHistory); 4503 } 4504 4505 /** 4506 * Get the convergence detector (for iterative resolution analysis) 4507 * 4508 * @return convergence detector (null if iterative resolution is disabled) 4509 */ 4510 public ConvergenceDetector getConvergenceDetector() { 4511 return convergenceDetector; 4512 } 4513 4514 /** 4515 * Get the scope build result (for testing and analysis) 4516 * 4517 * @return scope build result from ScopeBuilder (null if not yet resolved) 4518 */ 4519 public ScopeBuildResult getScopeBuildResult() { 4520 return scopeBuildResult; 4521 } 4522 4523 /** 4524 * Get the resolution result access interface. 4525 * This provides a clean, statement-centric API for accessing resolution results. 4526 * 4527 * <p>Usage example:</p> 4528 * <pre> 4529 * TSQLResolver2 resolver = new TSQLResolver2(null, parser.sqlstatements); 4530 * resolver.resolve(); 4531 * 4532 * IResolutionResult result = resolver.getResult(); 4533 * 4534 * for (TCustomSqlStatement stmt : parser.sqlstatements) { 4535 * for (TTable table : result.getTables(stmt)) { 4536 * System.out.println("Table: " + table.getFullName()); 4537 * for (TObjectName col : result.getColumnsForTable(stmt, table)) { 4538 * System.out.println(" Column: " + col.getColumnNameOnly()); 4539 * } 4540 * } 4541 * } 4542 * </pre> 4543 * 4544 * @return resolution result access interface 4545 * @throws IllegalStateException if resolve() has not been called 4546 */ 4547 public IResolutionResult getResult() { 4548 if (scopeBuildResult == null) { 4549 throw new IllegalStateException( 4550 "Must call resolve() before getResult()"); 4551 } 4552 return new ResolutionResultImpl(scopeBuildResult, sqlStatements); 4553 } 4554 4555 // ===== Binding Diagnostic API (plan §5.3, S2 stub / S5 wired) ===== 4556 4557 /** 4558 * Aggregate binding result populated by {@link BindingDiagnosticPostPass} 4559 * after iterative resolution converges (S5). Always non-null — including 4560 * when binding flags are off, when {@code resolve()} has not been called, 4561 * or when {@link gudusoft.gsqlparser.TGSqlParser#parse()} returned a 4562 * syntax error (plan §5.6, §12). 4563 * 4564 * @return the binding result; never null 4565 */ 4566 public BindingResult getBindingResult() { 4567 return bindingResult != null ? bindingResult : BindingResult.empty(); 4568 } 4569 4570 /** 4571 * Convenience accessor for {@link #getBindingResult()}{@code .getDiagnostics()}. 4572 * 4573 * @return the diagnostics list; never null 4574 */ 4575 public List<BindingDiagnostic> getBindingDiagnostics() { 4576 return getBindingResult().getDiagnostics(); 4577 } 4578 4579 /** 4580 * Convenience accessor for {@link #getBindingResult()}{@code .hasErrors()}. 4581 * 4582 * @return whether any ERROR-severity binding diagnostic was emitted 4583 */ 4584 public boolean hasBindingErrors() { 4585 return getBindingResult().hasErrors(); 4586 } 4587 4588 // ===== Star Column Reverse Inference Support (Principle 3) ===== 4589 4590 /** 4591 * Star Column push-down context for reverse inference. 4592 * Tracks which columns should be added to which Namespaces based on 4593 * outer layer references. 4594 */ 4595 private static class StarPushDownContext { 4596 /** Namespace -> (ColumnName -> Confidence) */ 4597 private final Map<INamespace, Map<String, Double>> pushDownMap = new HashMap<>(); 4598 4599 /** 4600 * Record that a column should be added to a namespace. 4601 * If the same column is pushed multiple times, keep the highest confidence. 4602 */ 4603 public void pushColumn(INamespace namespace, String columnName, double confidence) { 4604 Map<String, Double> columns = pushDownMap.computeIfAbsent(namespace, k -> new HashMap<>()); 4605 columns.put(columnName, Math.max(confidence, columns.getOrDefault(columnName, 0.0))); 4606 } 4607 4608 /** 4609 * Get all columns that should be pushed to each namespace. 4610 */ 4611 public Map<INamespace, java.util.Set<String>> getAllPushDownColumns() { 4612 Map<INamespace, java.util.Set<String>> result = new HashMap<>(); 4613 for (Map.Entry<INamespace, Map<String, Double>> entry : pushDownMap.entrySet()) { 4614 result.put(entry.getKey(), entry.getValue().keySet()); 4615 } 4616 return result; 4617 } 4618 4619 /** 4620 * Get the confidence score for a specific column in a namespace. 4621 */ 4622 public double getConfidence(INamespace namespace, String columnName) { 4623 return pushDownMap.getOrDefault(namespace, java.util.Collections.emptyMap()) 4624 .getOrDefault(columnName, 0.0); 4625 } 4626 4627 /** 4628 * Get the total number of columns to be pushed down across all namespaces. 4629 */ 4630 public int getTotalPushedColumns() { 4631 return pushDownMap.values().stream() 4632 .mapToInt(Map::size) 4633 .sum(); 4634 } 4635 } 4636 4637 /** 4638 * Represents a star column source (CTE or subquery with SELECT *). 4639 * Used for reverse inference to track which columns are required from the star. 4640 */ 4641 private static class StarColumnSource { 4642 private final String name; // CTE name or subquery alias 4643 private final INamespace namespace; // The namespace for this source 4644 private final INamespace underlyingTableNamespace; // Namespace of the table behind SELECT * 4645 private final java.util.Set<String> requiredColumns = new java.util.HashSet<>(); 4646 4647 public StarColumnSource(String name, INamespace namespace, INamespace underlyingTableNamespace) { 4648 this.name = name; 4649 this.namespace = namespace; 4650 this.underlyingTableNamespace = underlyingTableNamespace; 4651 } 4652 4653 public String getName() { 4654 return name; 4655 } 4656 4657 public INamespace getNamespace() { 4658 return namespace; 4659 } 4660 4661 public void addRequiredColumn(String columnName) { 4662 requiredColumns.add(columnName); 4663 } 4664 4665 public java.util.Set<String> getRequiredColumns() { 4666 return requiredColumns; 4667 } 4668 4669 public boolean hasUnderlyingTable() { 4670 return underlyingTableNamespace != null; 4671 } 4672 4673 public INamespace getUnderlyingTableNamespace() { 4674 return underlyingTableNamespace; 4675 } 4676 4677 @Override 4678 public String toString() { 4679 return String.format("StarColumnSource[%s, required=%d]", name, requiredColumns.size()); 4680 } 4681 } 4682 4683 /** 4684 * Collect all star column sources (CTEs and subqueries with SELECT *). 4685 * Traverses the scope tree to find CTENamespace and SubqueryNamespace 4686 * that use SELECT * in their subqueries. 4687 */ 4688 private List<StarColumnSource> collectAllStarColumnSources() { 4689 List<StarColumnSource> sources = new ArrayList<>(); 4690 4691 // Traverse global scope tree 4692 if (globalScope != null) { 4693 collectStarSourcesFromScope(globalScope, sources); 4694 } 4695 4696 // Also traverse UPDATE scopes (for Teradata UPDATE...FROM syntax) 4697 if (scopeBuilder != null) { 4698 for (UpdateScope updateScope : scopeBuilder.getUpdateScopeMap().values()) { 4699 collectStarSourcesFromScope(updateScope, sources); 4700 } 4701 for (DeleteScope deleteScope : scopeBuilder.getDeleteScopeMap().values()) { 4702 collectStarSourcesFromScope(deleteScope, sources); 4703 } 4704 } 4705 4706 logDebug("Collected " + sources.size() + " star column sources"); 4707 return sources; 4708 } 4709 4710 /** 4711 * Recursively collect star column sources from a scope and its children. 4712 */ 4713 private void collectStarSourcesFromScope(IScope scope, List<StarColumnSource> sources) { 4714 // Check all child namespaces in this scope 4715 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4716 INamespace namespace = child.getNamespace(); 4717 4718 // Use the new interface method to check for star columns 4719 if (namespace.hasStarColumn()) { 4720 TSelectSqlStatement selectStmt = namespace.getSelectStatement(); 4721 INamespace underlyingNs = selectStmt != null ? getFirstTableNamespace(selectStmt) : null; 4722 4723 StarColumnSource starSource = new StarColumnSource( 4724 namespace.getDisplayName(), 4725 namespace, 4726 underlyingNs 4727 ); 4728 sources.add(starSource); 4729 4730 logDebug("Found star source: " + namespace.getDisplayName()); 4731 } 4732 } 4733 4734 // Recursively traverse child scopes based on scope type 4735 if (scope instanceof SelectScope) { 4736 SelectScope selectScope = (SelectScope) scope; 4737 if (selectScope.getFromScope() != null) { 4738 collectStarSourcesFromScope(selectScope.getFromScope(), sources); 4739 } 4740 } else if (scope instanceof UpdateScope) { 4741 UpdateScope updateScope = (UpdateScope) scope; 4742 if (updateScope.getFromScope() != null) { 4743 collectStarSourcesFromScope(updateScope.getFromScope(), sources); 4744 } 4745 } else if (scope instanceof DeleteScope) { 4746 DeleteScope deleteScope = (DeleteScope) scope; 4747 if (deleteScope.getFromScope() != null) { 4748 collectStarSourcesFromScope(deleteScope.getFromScope(), sources); 4749 } 4750 } 4751 } 4752 4753 4754 /** 4755 * Get the first table namespace from a SELECT statement's FROM clause. 4756 * Returns the DynamicStarSource if available. 4757 */ 4758 private INamespace getFirstTableNamespace(TSelectSqlStatement select) { 4759 if (select == null || select.tables == null || select.tables.size() == 0) { 4760 return null; 4761 } 4762 4763 // Get first table 4764 TTable firstTable = select.tables.getTable(0); 4765 String tableName = firstTable.getAliasName() != null 4766 ? firstTable.getAliasName() 4767 : firstTable.getName(); 4768 4769 // Search for corresponding namespace in all dynamic namespaces 4770 List<INamespace> dynamicNamespaces = getAllDynamicNamespaces(); 4771 for (INamespace ns : dynamicNamespaces) { 4772 if (ns.getDisplayName().equals(tableName)) { 4773 return ns; 4774 } 4775 } 4776 4777 return null; 4778 } 4779 4780 /** 4781 * Collect all outer references to a star column source. 4782 * Searches through allColumnReferences for columns that reference this star source. 4783 */ 4784 private List<TObjectName> collectOuterReferencesToSource(StarColumnSource starSource) { 4785 List<TObjectName> references = new ArrayList<>(); 4786 4787 if (starSource == null || starSource.getName() == null) { 4788 return references; 4789 } 4790 4791 String sourceName = starSource.getName(); 4792 4793 // Search through all collected column references 4794 for (TObjectName objName : allColumnReferences) { 4795 if (objName == null) { 4796 continue; 4797 } 4798 4799 // Check if this column reference is from the star source 4800 // E.g., for CTE named "my_cte", check if objName is like "my_cte.col1" 4801 String tableQualifier = getTableQualifier(objName); 4802 4803 if (tableQualifier != null && tableQualifier.equalsIgnoreCase(sourceName)) { 4804 references.add(objName); 4805 logDebug("Found outer reference: " + objName + " -> " + sourceName); 4806 } 4807 } 4808 4809 logDebug("Collected " + references.size() + " outer references for: " + sourceName); 4810 return references; 4811 } 4812 4813 /** 4814 * Get the table qualifier from a TObjectName. 4815 * E.g., for "schema.table.column", returns "table" 4816 * E.g., for "table.column", returns "table" 4817 * E.g., for "column", returns null 4818 */ 4819 private String getTableQualifier(TObjectName objName) { 4820 if (objName == null) { 4821 return null; 4822 } 4823 4824 // TObjectName has parts like: [schema, table, column] 4825 // or [table, column] 4826 // or [column] 4827 4828 // If there are 3 or more parts, the second-to-last is the table 4829 // If there are 2 parts, the first is the table 4830 // If there is 1 part, there's no table qualifier 4831 4832 String fullName = objName.toString(); 4833 String[] parts = fullName.split("\\."); 4834 4835 if (parts.length >= 3) { 4836 // schema.table.column -> return table 4837 return parts[parts.length - 2]; 4838 } else if (parts.length == 2) { 4839 // table.column -> return table 4840 return parts[0]; 4841 } else { 4842 // Just column name, no qualifier 4843 return null; 4844 } 4845 } 4846 4847 /** 4848 * Get all DynamicStarSource namespaces from the scope tree. 4849 * This is used to apply inference results to namespaces that need enhancement. 4850 */ 4851 private List<INamespace> getAllDynamicNamespaces() { 4852 List<INamespace> result = new ArrayList<>(); 4853 4854 // Collect from global scope tree 4855 if (globalScope != null) { 4856 collectDynamicNamespacesFromScope(globalScope, result); 4857 } 4858 4859 return result; 4860 } 4861 4862 /** 4863 * Recursively collect DynamicStarSource namespaces from a scope and its children. 4864 */ 4865 private void collectDynamicNamespacesFromScope(IScope scope, List<INamespace> result) { 4866 if (scope == null) { 4867 return; 4868 } 4869 4870 // Get all child namespaces from this scope 4871 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4872 INamespace namespace = child.getNamespace(); 4873 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.DynamicStarSource) { 4874 result.add(namespace); 4875 logDebug("Found DynamicStarSource: " + namespace.getDisplayName()); 4876 } 4877 } 4878 4879 // Recursively traverse child scopes based on scope type 4880 if (scope instanceof SelectScope) { 4881 SelectScope selectScope = (SelectScope) scope; 4882 4883 // Traverse FROM scope 4884 if (selectScope.getFromScope() != null) { 4885 collectDynamicNamespacesFromScope(selectScope.getFromScope(), result); 4886 } 4887 } else if (scope instanceof CTEScope) { 4888 CTEScope cteScope = (CTEScope) scope; 4889 4890 // CTEs are already included in the children check above 4891 // But we need to check their subqueries by traversing nested scopes 4892 // The CTE namespaces themselves contain references to subquery scopes 4893 } else if (scope instanceof FromScope) { 4894 FromScope fromScope = (FromScope) scope; 4895 4896 // FROM scope children are already checked above 4897 // No additional child scopes to traverse 4898 } else if (scope instanceof GroupByScope) { 4899 GroupByScope groupByScope = (GroupByScope) scope; 4900 4901 // GroupBy scope typically doesn't have child scopes 4902 } else if (scope instanceof HavingScope) { 4903 HavingScope havingScope = (HavingScope) scope; 4904 4905 // Having scope typically doesn't have child scopes 4906 } else if (scope instanceof OrderByScope) { 4907 OrderByScope orderByScope = (OrderByScope) scope; 4908 4909 // OrderBy scope typically doesn't have child scopes 4910 } 4911 4912 // Additionally, traverse parent-child scope relationships 4913 // by checking if any of the namespaces contain nested SELECT statements 4914 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 4915 INamespace namespace = child.getNamespace(); 4916 4917 // If this is a SubqueryNamespace, it contains a SELECT with its own scope tree 4918 if (namespace instanceof gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace) { 4919 // Subquery scopes are processed during scope building 4920 // and would be in statementScopeCache if we tracked them 4921 } 4922 } 4923 } 4924 4925 // ===== Logging helpers ===== 4926 4927 private void logInfo(String message) { 4928 TBaseType.log("[TSQLResolver2] " + message, TLog.INFO); 4929 } 4930 4931 private void logDebug(String message) { 4932 TBaseType.log("[TSQLResolver2] " + message, TLog.DEBUG); 4933 } 4934 4935 private void logError(String message) { 4936 TBaseType.log("[TSQLResolver2] " + message, TLog.ERROR); 4937 } 4938}