001package gudusoft.gsqlparser.resolver2.enhancement; 002 003import gudusoft.gsqlparser.TBaseType; 004import gudusoft.gsqlparser.nodes.TCTE; 005import gudusoft.gsqlparser.nodes.TObjectName; 006import gudusoft.gsqlparser.nodes.TTable; 007import gudusoft.gsqlparser.resolver2.ResolutionStatus; 008import gudusoft.gsqlparser.resolver2.ScopeBuildResult; 009import gudusoft.gsqlparser.resolver2.inference.EvidenceType; 010import gudusoft.gsqlparser.resolver2.model.ResolutionEvidence; 011import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 012import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 013import gudusoft.gsqlparser.resolver2.namespace.INamespace; 014import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 015import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace; 016import gudusoft.gsqlparser.resolver2.scope.IScope; 017import gudusoft.gsqlparser.resolver2.scope.SelectScope; 018import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 019 020import java.util.*; 021 022/** 023 * Handles explicit namespace enhancement between resolution passes. 024 * 025 * The enhancement process: 026 * 1. During resolution, collect column references that target namespaces with star columns 027 * 2. Between passes, explicitly add these columns to the namespaces 028 * 3. Track which columns were added in which pass for debugging 029 * 030 * This makes the column inference process explicit and traceable, rather than 031 * happening implicitly during resolution. 032 */ 033public class NamespaceEnhancer { 034 035 /** Collected column references pending enhancement */ 036 private final List<CollectedColumnRef> pendingRefs; 037 038 /** History of all enhancement results */ 039 private final List<EnhancementResult> enhancementHistory; 040 041 /** Current pass number */ 042 private int currentPass; 043 044 /** All namespaces that have star columns (cached) */ 045 private Set<INamespace> starNamespaces; 046 047 /** Map of TTable -> INamespace for subqueries in FROM clauses (for propagation) */ 048 private Map<TTable, INamespace> tableToNamespaceMap; 049 050 public NamespaceEnhancer() { 051 this(false); 052 } 053 054 public NamespaceEnhancer(boolean debug) { 055 // debug parameter kept for API compatibility but no longer used 056 this.pendingRefs = new ArrayList<>(); 057 this.enhancementHistory = new ArrayList<>(); 058 this.currentPass = 0; 059 this.starNamespaces = null; 060 } 061 062 /** 063 * Initialize the enhancer with scope build result. 064 * Caches namespaces that have star columns and builds TTable -> INamespace mapping. 065 */ 066 public void initialize(ScopeBuildResult scopeBuildResult) { 067 this.starNamespaces = new HashSet<>(); 068 this.tableToNamespaceMap = new HashMap<>(); 069 070 // Collect from all SelectScopes in the statement scope map 071 // This is more reliable than traversing from GlobalScope since the scope tree 072 // may have different structures for different SQL constructs 073 for (IScope scope : scopeBuildResult.getStatementScopeMap().values()) { 074 collectStarNamespacesInternal(scope, new HashSet<>()); 075 } 076 } 077 078 /** 079 * Recursively collect all namespaces that have star columns. 080 * Uses a visited set to avoid infinite recursion. 081 */ 082 private void collectStarNamespaces(IScope scope) { 083 collectStarNamespacesInternal(scope, new HashSet<>()); 084 } 085 086 private void collectStarNamespacesInternal(IScope scope, Set<IScope> visited) { 087 if (scope == null || visited.contains(scope)) return; 088 visited.add(scope); 089 090 // Check if this scope has namespaces with star columns 091 if (scope instanceof SelectScope) { 092 SelectScope selectScope = (SelectScope) scope; 093 IScope fromScope = selectScope.getFromScope(); 094 if (fromScope != null && !visited.contains(fromScope)) { 095 visited.add(fromScope); 096 // Check children (ScopeChild contains namespaces) 097 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : fromScope.getChildren()) { 098 INamespace childNs = child.getNamespace(); 099 if (childNs != null) { 100 collectFromNamespace(childNs, new HashSet<>()); 101 } 102 } 103 } 104 } 105 106 // Check children of this scope (ScopeChild contains namespaces) 107 for (gudusoft.gsqlparser.resolver2.model.ScopeChild child : scope.getChildren()) { 108 INamespace childNs = child.getNamespace(); 109 if (childNs != null) { 110 collectFromNamespace(childNs, new HashSet<>()); 111 } 112 } 113 } 114 115 /** 116 * Recursively collect namespaces with star columns from a namespace and its nested namespaces. 117 */ 118 private void collectFromNamespace(INamespace namespace, Set<INamespace> visitedNs) { 119 if (namespace == null || visitedNs.contains(namespace)) return; 120 visitedNs.add(namespace); 121 122 // Add this namespace if it has star columns 123 if (namespace.hasStarColumn()) { 124 starNamespaces.add(namespace); 125 } 126 127 // If this is a CTENamespace, also check its internal UnionNamespace 128 if (namespace instanceof CTENamespace) { 129 CTENamespace cteNs = (CTENamespace) namespace; 130 UnionNamespace unionNs = cteNs.getUnionNamespace(); 131 if (unionNs != null) { 132 collectFromNamespace(unionNs, visitedNs); 133 } 134 } 135 136 // If this is a UnionNamespace, also add its branch namespaces 137 if (namespace instanceof UnionNamespace) { 138 UnionNamespace unionNs = (UnionNamespace) namespace; 139 for (SubqueryNamespace branchNs : unionNs.getBranchNamespaces()) { 140 collectFromNamespace(branchNs, visitedNs); 141 } 142 } 143 } 144 145 /** 146 * Start a new resolution pass. 147 * Clears pending references from previous pass. 148 */ 149 public void startPass(int passNumber) { 150 this.currentPass = passNumber; 151 this.pendingRefs.clear(); 152 } 153 154 /** 155 * Collect a column reference that could not be resolved. 156 * Called during resolution when a column targets a star namespace. 157 * 158 * @param columnName The column name 159 * @param targetNamespace The namespace the column should belong to 160 * @param sourceReference The AST node 161 * @param confidence Confidence score 162 * @param evidence Reason for this inference 163 * 164 * @deprecated Use {@link #collectColumnRef(String, INamespace, TObjectName, ResolutionEvidence)} instead 165 */ 166 public void collectColumnRef(String columnName, 167 INamespace targetNamespace, 168 TObjectName sourceReference, 169 double confidence, 170 String evidence) { 171 if (columnName == null || columnName.isEmpty()) { 172 return; 173 } 174 175 // Only collect for star namespaces or namespaces that support dynamic inference 176 if (targetNamespace == null || 177 (!targetNamespace.hasStarColumn() && !targetNamespace.supportsDynamicInference())) { 178 return; 179 } 180 181 CollectedColumnRef ref = new CollectedColumnRef( 182 columnName, 183 targetNamespace, 184 sourceReference, 185 currentPass, 186 confidence, 187 evidence 188 ); 189 190 // Avoid duplicates 191 if (!pendingRefs.contains(ref)) { 192 pendingRefs.add(ref); 193 } 194 } 195 196 /** 197 * Collect a column reference with structured evidence. 198 * Called during resolution when a column targets a star namespace. 199 * 200 * @param columnName The column name 201 * @param targetNamespace The namespace the column should belong to 202 * @param sourceReference The AST node 203 * @param evidenceDetail Structured evidence detail 204 */ 205 public void collectColumnRef(String columnName, 206 INamespace targetNamespace, 207 TObjectName sourceReference, 208 ResolutionEvidence evidenceDetail) { 209 if (columnName == null || columnName.isEmpty()) { 210 return; 211 } 212 213 // Only collect for star namespaces or namespaces that support dynamic inference 214 if (targetNamespace == null || 215 (!targetNamespace.hasStarColumn() && !targetNamespace.supportsDynamicInference())) { 216 return; 217 } 218 219 CollectedColumnRef ref = new CollectedColumnRef( 220 columnName, 221 targetNamespace, 222 sourceReference, 223 currentPass, 224 evidenceDetail 225 ); 226 227 // Avoid duplicates 228 if (!pendingRefs.contains(ref)) { 229 pendingRefs.add(ref); 230 } 231 } 232 233 /** 234 * Collect a column reference based on resolution result. 235 * Automatically determines if collection is needed based on resolution status. 236 */ 237 public void collectFromResolution(TObjectName column, 238 ResolutionResult result, 239 INamespace candidateNamespace) { 240 if (result == null || column == null) return; 241 242 String columnName = column.getColumnNameOnly(); 243 if (columnName == null || columnName.isEmpty()) return; 244 245 // Collect if: 246 // 1. Column was not found, but we have a candidate namespace with star 247 // 2. Column was resolved with low confidence through star inference 248 if (result.getStatus() == ResolutionStatus.NOT_FOUND) { 249 if (candidateNamespace != null && candidateNamespace.hasStarColumn()) { 250 ResolutionEvidence evidence = ResolutionEvidence.fromStarInference( 251 columnName, 252 candidateNamespace.getDisplayName(), 253 false, // unqualified reference 254 column 255 ); 256 collectColumnRef(columnName, candidateNamespace, column, evidence); 257 } 258 } else if (result.getStatus() == ResolutionStatus.EXACT_MATCH) { 259 // If resolved through star namespace, already tracked - no action needed 260 } 261 } 262 263 /** 264 * Perform namespace enhancement using collected column references. 265 * This is called BETWEEN resolution passes. 266 * 267 * @return EnhancementResult with details of what was added 268 */ 269 public EnhancementResult enhance() { 270 EnhancementResult result = new EnhancementResult(currentPass); 271 272 for (CollectedColumnRef ref : pendingRefs) { 273 INamespace namespace = ref.getTargetNamespace(); 274 String columnName = ref.getColumnName(); 275 276 // Try to add the column using the collected evidence 277 // Use evidenceDetail if available, otherwise fall back to legacy parameters 278 boolean added; 279 if (ref.getEvidenceDetail() != null) { 280 added = namespace.addInferredColumn( 281 columnName, 282 ref.getEvidenceDetail().getWeight(), 283 ref.getEvidenceDetail().toLegacyEvidence() 284 ); 285 } else { 286 added = namespace.addInferredColumn( 287 columnName, 288 ref.getConfidence(), 289 ref.getEvidence() 290 ); 291 } 292 293 if (added) { 294 result.recordAdded(namespace, columnName, ref); 295 } else { 296 result.recordSkipped(namespace, columnName, ref); 297 } 298 } 299 300 // Propagate inferred columns through CTE chains 301 // This handles cases like: WITH cte2 AS (SELECT * FROM cte1) where cte1 also has SELECT * 302 // Columns pushed to cte2 should also be pushed to cte1 303 propagateThroughCTEChains(result); 304 305 // Store in history 306 enhancementHistory.add(result); 307 308 // Clear pending refs 309 pendingRefs.clear(); 310 311 return result; 312 } 313 314 /** 315 * Propagate inferred columns through CTE chains. 316 * 317 * When a CTE references another CTE with star columns (e.g., WITH cte2 AS (SELECT * FROM cte1)), 318 * any columns inferred in cte2 should also be pushed to cte1. 319 * 320 * Also propagates from SubqueryNamespace to referenced CTEs: 321 * When a subquery like (SELECT * FROM cte1) has inferred columns, 322 * those columns should propagate to cte1. 323 * 324 * This method uses the actual CTENamespace instances from starNamespaces (not new instances), 325 * ensuring that changes persist to the scope tree. 326 */ 327 private void propagateThroughCTEChains(EnhancementResult result) { 328 // Build a map of TCTE -> CTENamespace for the actual instances from the scope tree 329 Map<TCTE, CTENamespace> cteMap = buildCTEMap(); 330 331 if (cteMap.isEmpty()) { 332 return; 333 } 334 335 // Keep propagating until no more changes 336 boolean changed; 337 int iterations = 0; 338 int maxIterations = 10; // Safety limit 339 340 do { 341 changed = false; 342 iterations++; 343 344 for (INamespace ns : starNamespaces) { 345 if (ns instanceof CTENamespace) { 346 CTENamespace cteNs = (CTENamespace) ns; 347 changed |= propagateToReferencedCTEs(cteNs, cteMap, result); 348 } else if (ns instanceof UnionNamespace) { 349 // Propagate from UnionNamespace to CTEs/subqueries referenced in its branches 350 UnionNamespace unionNs = (UnionNamespace) ns; 351 changed |= propagateFromUnionToCTEs(unionNs, cteMap, result); 352 } else if (ns instanceof SubqueryNamespace) { 353 // Also propagate from SubqueryNamespace to CTEs it references 354 SubqueryNamespace subNs = (SubqueryNamespace) ns; 355 changed |= propagateFromSubqueryToCTEs(subNs, cteMap, result); 356 } 357 } 358 } while (changed && iterations < maxIterations); 359 } 360 361 /** 362 * Build a map of TCTE -> CTENamespace from the actual instances in starNamespaces. 363 */ 364 private Map<TCTE, CTENamespace> buildCTEMap() { 365 Map<TCTE, CTENamespace> map = new HashMap<>(); 366 for (INamespace ns : starNamespaces) { 367 if (ns instanceof CTENamespace) { 368 CTENamespace cteNs = (CTENamespace) ns; 369 if (cteNs.getCTE() != null) { 370 map.put(cteNs.getCTE(), cteNs); 371 } 372 } 373 } 374 return map; 375 } 376 377 /** 378 * Propagate inferred columns from a CTENamespace to CTEs and subqueries it references. 379 * 380 * This handles: 381 * 1. CTE references (WITH cte AS (SELECT * FROM other_cte)) 382 * 2. Subquery references (WITH cte AS (SELECT * FROM (SELECT * FROM t))) 383 * 3. UNION subqueries (WITH cte AS (SELECT * FROM (SELECT ... UNION ALL SELECT ...))) 384 * 385 * @return true if any columns were added 386 */ 387 private boolean propagateToReferencedCTEs(CTENamespace source, 388 Map<TCTE, CTENamespace> cteMap, 389 EnhancementResult result) { 390 boolean changed = false; 391 392 Set<String> inferredCols = source.getInferredColumns(); 393 if (inferredCols.isEmpty()) { 394 return false; 395 } 396 397 TSelectSqlStatement stmt = source.getSelectStatement(); 398 if (stmt == null || stmt.tables == null) { 399 return false; 400 } 401 402 for (int i = 0; i < stmt.tables.size(); i++) { 403 TTable table = stmt.tables.getTable(i); 404 if (table == null) continue; 405 406 // Case 1: CTE reference - look up the actual CTENamespace instance 407 if (table.isCTEName() && table.getCTE() != null) { 408 CTENamespace targetNs = cteMap.get(table.getCTE()); 409 if (targetNs != null && targetNs.hasStarColumn()) { 410 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 411 "CTE chain propagation"); 412 } 413 } 414 // Case 2: Subquery table - find the namespace in starNamespaces 415 else if (table.getSubquery() != null) { 416 TSelectSqlStatement subquery = table.getSubquery(); 417 INamespace targetNs = findNamespaceForStatement(subquery); 418 if (targetNs != null && targetNs.hasStarColumn()) { 419 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 420 "CTE to subquery propagation"); 421 } 422 } 423 } 424 425 return changed; 426 } 427 428 /** 429 * Find the namespace in starNamespaces that corresponds to a given TSelectSqlStatement. 430 * This is used to look up the actual namespace instance for a subquery. 431 * 432 * @param stmt The TSelectSqlStatement to find 433 * @return The corresponding namespace, or null if not found 434 */ 435 private INamespace findNamespaceForStatement(TSelectSqlStatement stmt) { 436 if (stmt == null) return null; 437 438 for (INamespace ns : starNamespaces) { 439 if (ns instanceof SubqueryNamespace) { 440 SubqueryNamespace subNs = (SubqueryNamespace) ns; 441 if (subNs.getSelectStatement() == stmt) { 442 return subNs; 443 } 444 } else if (ns instanceof UnionNamespace) { 445 UnionNamespace unionNs = (UnionNamespace) ns; 446 if (unionNs.getSelectStatement() == stmt) { 447 return unionNs; 448 } 449 } 450 } 451 return null; 452 } 453 454 /** 455 * Propagate a set of columns from source namespace to target namespace. 456 * 457 * @param source The source namespace with inferred columns 458 * @param target The target namespace to propagate to 459 * @param columns The column names to propagate 460 * @param result EnhancementResult to record changes 461 * @param evidencePrefix Prefix for evidence string 462 * @return true if any columns were added 463 */ 464 private boolean propagateColumnsToNamespace(INamespace source, 465 INamespace target, 466 Set<String> columns, 467 EnhancementResult result, 468 String evidencePrefix) { 469 boolean changed = false; 470 for (String colName : columns) { 471 ResolutionEvidence evidence = new ResolutionEvidence( 472 EvidenceType.INFERRED_FROM_USAGE, 473 0.8, 474 evidencePrefix + " from " + source.getDisplayName() + " to " + target.getDisplayName(), 475 (gudusoft.gsqlparser.nodes.TParseTreeNode) null 476 ); 477 boolean added = target.addInferredColumn(colName, evidence.getWeight(), evidence.toLegacyEvidence()); 478 if (added) { 479 changed = true; 480 result.recordAdded(target, colName, null); 481 } 482 } 483 return changed; 484 } 485 486 /** 487 * Propagate inferred columns from a SubqueryNamespace to CTEs and subqueries it references. 488 * 489 * When a subquery has SELECT * FROM cte and the subquery has inferred columns, 490 * those columns should propagate to the CTE so the CTE's star columns can be expanded. 491 * 492 * @param source The SubqueryNamespace with inferred columns 493 * @param cteMap Map of TCTE to CTENamespace 494 * @param result EnhancementResult to record changes 495 * @return true if any columns were added 496 */ 497 private boolean propagateFromSubqueryToCTEs(SubqueryNamespace source, 498 Map<TCTE, CTENamespace> cteMap, 499 EnhancementResult result) { 500 boolean changed = false; 501 502 Set<String> inferredCols = source.getInferredColumns(); 503 if (inferredCols.isEmpty()) { 504 return false; 505 } 506 507 TSelectSqlStatement stmt = source.getSelectStatement(); 508 if (stmt == null || stmt.tables == null) { 509 return false; 510 } 511 512 for (int i = 0; i < stmt.tables.size(); i++) { 513 TTable table = stmt.tables.getTable(i); 514 if (table == null) continue; 515 516 // Case 1: CTE reference - look up the actual CTENamespace instance 517 if (table.isCTEName() && table.getCTE() != null) { 518 CTENamespace targetNs = cteMap.get(table.getCTE()); 519 if (targetNs != null && targetNs.hasStarColumn()) { 520 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 521 "Subquery to CTE propagation"); 522 } 523 } 524 // Case 2: Subquery table - find the namespace in starNamespaces 525 else if (table.getSubquery() != null) { 526 TSelectSqlStatement subquery = table.getSubquery(); 527 INamespace targetNs = findNamespaceForStatement(subquery); 528 if (targetNs != null && targetNs.hasStarColumn()) { 529 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 530 "Subquery to subquery propagation"); 531 } 532 } 533 } 534 535 return changed; 536 } 537 538 /** 539 * Propagate inferred columns from a UnionNamespace to CTEs and subqueries referenced in its branches. 540 * 541 * UNION branches can reference CTEs or subqueries. When columns are inferred in the UNION, 542 * they should propagate to all branch sources. 543 * 544 * @param source The UnionNamespace with inferred columns 545 * @param cteMap Map of TCTE to CTENamespace 546 * @param result EnhancementResult to record changes 547 * @return true if any columns were added 548 */ 549 private boolean propagateFromUnionToCTEs(UnionNamespace source, 550 Map<TCTE, CTENamespace> cteMap, 551 EnhancementResult result) { 552 boolean changed = false; 553 554 Set<String> inferredCols = source.getInferredColumns(); 555 if (inferredCols.isEmpty()) { 556 return false; 557 } 558 559 // Propagate through each branch of the UNION 560 for (SubqueryNamespace branchNs : source.getBranchNamespaces()) { 561 TSelectSqlStatement branchStmt = branchNs.getSelectStatement(); 562 if (branchStmt == null || branchStmt.tables == null) continue; 563 564 for (int i = 0; i < branchStmt.tables.size(); i++) { 565 TTable table = branchStmt.tables.getTable(i); 566 if (table == null) continue; 567 568 // Case 1: CTE reference 569 if (table.isCTEName() && table.getCTE() != null) { 570 CTENamespace targetNs = cteMap.get(table.getCTE()); 571 if (targetNs != null && targetNs.hasStarColumn()) { 572 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 573 "UNION branch to CTE propagation"); 574 } 575 } 576 // Case 2: Subquery table 577 else if (table.getSubquery() != null) { 578 TSelectSqlStatement subquery = table.getSubquery(); 579 INamespace targetNs = findNamespaceForStatement(subquery); 580 if (targetNs != null && targetNs.hasStarColumn()) { 581 changed |= propagateColumnsToNamespace(source, targetNs, inferredCols, result, 582 "UNION branch to subquery propagation"); 583 } 584 } 585 } 586 } 587 588 return changed; 589 } 590 591 /** 592 * Get all enhancement results history 593 */ 594 public List<EnhancementResult> getEnhancementHistory() { 595 return Collections.unmodifiableList(enhancementHistory); 596 } 597 598 /** 599 * Get total columns added across all passes 600 */ 601 public int getTotalColumnsAdded() { 602 return enhancementHistory.stream() 603 .mapToInt(EnhancementResult::getTotalAdded) 604 .sum(); 605 } 606 607 /** 608 * Get number of pending references (not yet enhanced) 609 */ 610 public int getPendingCount() { 611 return pendingRefs.size(); 612 } 613 614 /** 615 * Check if a namespace is a star namespace 616 */ 617 public boolean isStarNamespace(INamespace namespace) { 618 return starNamespaces != null && starNamespaces.contains(namespace); 619 } 620 621 /** 622 * Get all star namespaces 623 */ 624 public Set<INamespace> getStarNamespaces() { 625 return starNamespaces != null 626 ? Collections.unmodifiableSet(starNamespaces) 627 : Collections.emptySet(); 628 } 629 630 /** 631 * Generate a summary report of all enhancements 632 */ 633 public String generateReport() { 634 StringBuilder sb = new StringBuilder(); 635 sb.append("=== Namespace Enhancement Summary ===\n"); 636 sb.append("Total passes: ").append(enhancementHistory.size()).append("\n"); 637 sb.append("Total columns added: ").append(getTotalColumnsAdded()).append("\n"); 638 sb.append("Star namespaces: ").append(starNamespaces != null ? starNamespaces.size() : 0).append("\n\n"); 639 640 for (EnhancementResult result : enhancementHistory) { 641 sb.append(result.toDetailedReport()); 642 } 643 644 return sb.toString(); 645 } 646}