001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.nodes.TCTE; 004import gudusoft.gsqlparser.nodes.TObjectName; 005import gudusoft.gsqlparser.nodes.TResultColumn; 006import gudusoft.gsqlparser.nodes.TResultColumnList; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.resolver2.ColumnLevel; 009import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 010import gudusoft.gsqlparser.resolver2.model.ColumnSource; 011import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 012 013import java.util.ArrayList; 014import java.util.Collections; 015import java.util.HashSet; 016import java.util.LinkedHashMap; 017import java.util.List; 018import java.util.Map; 019import java.util.Set; 020 021/** 022 * Namespace representing a Common Table Expression (CTE). 023 * Similar to SubqueryNamespace but handles CTE-specific features: 024 * - Explicit column list: WITH cte(c1, c2) AS (SELECT ...) 025 * - Recursive CTEs 026 * - Multiple references within same query 027 * - UNION subqueries: columns are pushed through to all UNION branches 028 * 029 * Example: 030 * WITH my_cte(id, name) AS ( 031 * SELECT user_id, user_name FROM users 032 * ) 033 * SELECT id, name FROM my_cte; 034 */ 035public class CTENamespace extends AbstractNamespace { 036 037 private final TCTE cte; 038 private final String cteName; 039 private final TSelectSqlStatement selectStatement; 040 041 /** CTE column list (explicit column names) */ 042 private final List<String> explicitColumns; 043 044 /** Whether this CTE is recursive */ 045 private final boolean recursive; 046 047 /** UnionNamespace if this CTE's subquery is a UNION */ 048 private UnionNamespace unionNamespace; 049 050 /** Inferred columns from star push-down */ 051 private Map<String, ColumnSource> inferredColumns; 052 053 /** Track inferred column names */ 054 private Set<String> inferredColumnNames; 055 056 /** 057 * The TTable that references this CTE in a FROM clause. 058 * Used as fallback for getFinalTable() when there's no underlying physical table. 059 * For example: WITH cte AS (SELECT 1 AS col) SELECT col FROM cte 060 * The referencing TTable is the 'cte' in the FROM clause. 061 */ 062 private TTable referencingTable; 063 064 /** 065 * Namespaces for FROM clause tables that support dynamic inference. 066 * Used to propagate inferred columns through deeply nested structures. 067 * Lazily initialized when needed. 068 * 069 * Example: WITH cte AS (SELECT * FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2) sub) 070 * The 'sub' subquery namespace is stored here for propagation. 071 */ 072 private List<INamespace> fromClauseNamespaces; 073 074 public CTENamespace(TCTE cte, 075 String cteName, 076 TSelectSqlStatement selectStatement, 077 INameMatcher nameMatcher) { 078 super(cte, nameMatcher); 079 this.cte = cte; 080 this.cteName = cteName; 081 this.selectStatement = selectStatement; 082 this.explicitColumns = extractExplicitColumns(cte); 083 this.recursive = isRecursiveCTE(cte); 084 085 // If the CTE's subquery is a UNION, create a UnionNamespace to handle it 086 if (selectStatement != null && selectStatement.isCombinedQuery()) { 087 this.unionNamespace = new UnionNamespace(selectStatement, cteName + "_union", nameMatcher); 088 } 089 } 090 091 public CTENamespace(TCTE cte, String cteName, TSelectSqlStatement selectStatement) { 092 this(cte, cteName, selectStatement, null); 093 } 094 095 @Override 096 public String getDisplayName() { 097 return cteName; 098 } 099 100 /** 101 * Get the TTable that references this CTE in a FROM clause. 102 */ 103 public TTable getReferencingTable() { 104 return referencingTable; 105 } 106 107 /** 108 * {@inheritDoc} 109 * For CTENamespace, returns the TTable that references this CTE in the query. 110 * This is the immediate source table for columns resolved through this CTE. 111 */ 112 @Override 113 public TTable getSourceTable() { 114 return referencingTable; 115 } 116 117 /** 118 * Set the TTable that references this CTE in a FROM clause. 119 * Called by ScopeBuilder when a CTE is referenced. 120 */ 121 public void setReferencingTable(TTable table) { 122 this.referencingTable = table; 123 } 124 125 @Override 126 public TTable getFinalTable() { 127 // Trace through the CTE's subquery to find the underlying physical table 128 // This is similar to SubqueryNamespace.getFinalTable() but handles CTE chains 129 130 // If this CTE has a UNION subquery, delegate to UnionNamespace 131 if (unionNamespace != null) { 132 TTable unionTable = unionNamespace.getFinalTable(); 133 if (unionTable != null) { 134 return unionTable; 135 } 136 // Fallback to referencing table if UNION has no physical tables 137 return referencingTable; 138 } 139 140 // If no tables in the CTE's SELECT, return the referencing table 141 // This handles CTEs like: WITH cte AS (SELECT 1 AS col) 142 if (selectStatement == null || selectStatement.tables == null || selectStatement.tables.size() == 0) { 143 return referencingTable; 144 } 145 146 // Check for qualified star column (e.g., CTE_NAME.*) first 147 TTable qualifiedStarTable = findTableFromQualifiedStar(); 148 if (qualifiedStarTable != null) { 149 return qualifiedStarTable; 150 } 151 152 // For single-table CTEs, trace to the underlying table 153 TTable firstTable = selectStatement.tables.getTable(0); 154 if (firstTable == null) { 155 return null; 156 } 157 158 // If it's a physical table (not a CTE reference), return it 159 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) { 160 return firstTable; 161 } 162 163 // If it's a CTE reference, trace through the CTE chain 164 if (firstTable.isCTEName() && firstTable.getCTE() != null) { 165 return traceTableThroughCTE(firstTable.getCTE()); 166 } 167 168 // If it's a subquery, trace through it 169 if (firstTable.getSubquery() != null) { 170 SubqueryNamespace nestedNs = new SubqueryNamespace( 171 firstTable.getSubquery(), 172 firstTable.getAliasName(), 173 nameMatcher 174 ); 175 nestedNs.validate(); 176 TTable subTable = nestedNs.getFinalTable(); 177 if (subTable != null) { 178 return subTable; 179 } 180 } 181 182 // If it's a join, get the first base table 183 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 184 TTable joinTable = findFirstPhysicalTableFromJoin(firstTable); 185 if (joinTable != null) { 186 return joinTable; 187 } 188 } 189 190 // Fallback: return the referencing TTable (the CTE reference in FROM clause) 191 // This is used when the CTE doesn't have underlying physical tables, 192 // e.g., WITH cte AS (SELECT 1 AS col) - the columns are literals, not from tables 193 return referencingTable; 194 } 195 196 /** 197 * Find the table referenced by a qualified star column in this CTE's SELECT list. 198 * Example: SELECT other_cte.* FROM other_cte -> traces to other_cte's underlying table 199 */ 200 private TTable findTableFromQualifiedStar() { 201 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 202 return null; 203 } 204 205 TResultColumnList selectList = selectStatement.getResultColumnList(); 206 for (int i = 0; i < selectList.size(); i++) { 207 TResultColumn resultCol = selectList.getResultColumn(i); 208 if (resultCol == null) continue; 209 210 String colStr = resultCol.toString().trim(); 211 // Check if it's a qualified star (contains . before *) 212 if (colStr.endsWith("*") && colStr.contains(".")) { 213 int dotIndex = colStr.lastIndexOf('.'); 214 if (dotIndex > 0) { 215 String tablePrefix = colStr.substring(0, dotIndex).trim(); 216 // Find the table with this alias or name 217 TTable matchingTable = findTableByAliasOrName(tablePrefix); 218 if (matchingTable != null) { 219 // If the matching table is a CTE reference, trace through it 220 if (matchingTable.isCTEName() && matchingTable.getCTE() != null) { 221 return traceTableThroughCTE(matchingTable.getCTE()); 222 } 223 // If it's a subquery, trace through it 224 if (matchingTable.getSubquery() != null) { 225 SubqueryNamespace nestedNs = new SubqueryNamespace( 226 matchingTable.getSubquery(), 227 matchingTable.getAliasName(), 228 nameMatcher 229 ); 230 nestedNs.validate(); 231 return nestedNs.getFinalTable(); 232 } 233 // If it's a physical table, return it 234 if (matchingTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !matchingTable.isCTEName()) { 235 return matchingTable; 236 } 237 } 238 } 239 } 240 } 241 return null; 242 } 243 244 /** 245 * Find a table in the FROM clause by alias or name. 246 */ 247 private TTable findTableByAliasOrName(String nameOrAlias) { 248 if (selectStatement == null || selectStatement.tables == null) { 249 return null; 250 } 251 252 for (int i = 0; i < selectStatement.tables.size(); i++) { 253 TTable table = selectStatement.tables.getTable(i); 254 if (table == null) continue; 255 256 // Check alias 257 String alias = table.getAliasName(); 258 if (alias != null && nameMatcher.matches(alias, nameOrAlias)) { 259 return table; 260 } 261 262 // Check table name 263 if (table.getTableName() != null && nameMatcher.matches(table.getTableName().toString(), nameOrAlias)) { 264 return table; 265 } 266 } 267 return null; 268 } 269 270 /** 271 * Trace through a CTE to find its underlying physical table. 272 * This handles CTE chains like: CTE1 -> CTE2 -> CTE3 -> physical_table 273 */ 274 private TTable traceTableThroughCTE(TCTE cteNode) { 275 return traceTableThroughCTE(cteNode, new HashSet<TCTE>()); 276 } 277 278 private TTable traceTableThroughCTE(TCTE cteNode, java.util.Set<TCTE> visited) { 279 if (cteNode == null || cteNode.getSubquery() == null) { 280 return null; 281 } 282 283 // Detect circular CTE references 284 if (!visited.add(cteNode)) { 285 return null; 286 } 287 288 TSelectSqlStatement cteSubquery = cteNode.getSubquery(); 289 290 // Handle UNION in the CTE 291 if (cteSubquery.isCombinedQuery()) { 292 // For UNION, trace the left branch 293 TSelectSqlStatement leftStmt = cteSubquery.getLeftStmt(); 294 if (leftStmt != null && leftStmt.tables != null && leftStmt.tables.size() > 0) { 295 cteSubquery = leftStmt; 296 } 297 } 298 299 if (cteSubquery.tables == null || cteSubquery.tables.size() == 0) { 300 return null; 301 } 302 303 TTable firstTable = cteSubquery.tables.getTable(0); 304 if (firstTable == null) { 305 return null; 306 } 307 308 // If it's a physical table (not CTE), we found it 309 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) { 310 return firstTable; 311 } 312 313 // If it's another CTE reference, continue tracing 314 if (firstTable.isCTEName() && firstTable.getCTE() != null) { 315 return traceTableThroughCTE(firstTable.getCTE(), visited); 316 } 317 318 // If it's a subquery, trace through it 319 if (firstTable.getSubquery() != null) { 320 SubqueryNamespace nestedNs = new SubqueryNamespace( 321 firstTable.getSubquery(), 322 firstTable.getAliasName(), 323 nameMatcher 324 ); 325 nestedNs.validate(); 326 return nestedNs.getFinalTable(); 327 } 328 329 // If it's a join, get the first base table 330 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 331 return findFirstPhysicalTableFromJoin(firstTable); 332 } 333 334 return null; 335 } 336 337 /** 338 * Find the first physical table from a JOIN expression. 339 */ 340 private TTable findFirstPhysicalTableFromJoin(TTable joinTable) { 341 if (joinTable == null || joinTable.getJoinExpr() == null) { 342 return null; 343 } 344 345 gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr(); 346 347 // Check left side first 348 TTable leftTable = joinExpr.getLeftTable(); 349 if (leftTable != null) { 350 if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !leftTable.isCTEName()) { 351 return leftTable; 352 } 353 if (leftTable.isCTEName() && leftTable.getCTE() != null) { 354 TTable traced = traceTableThroughCTE(leftTable.getCTE()); 355 if (traced != null) return traced; 356 } 357 if (leftTable.getSubquery() != null) { 358 SubqueryNamespace nestedNs = new SubqueryNamespace( 359 leftTable.getSubquery(), 360 leftTable.getAliasName(), 361 nameMatcher 362 ); 363 nestedNs.validate(); 364 return nestedNs.getFinalTable(); 365 } 366 if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 367 return findFirstPhysicalTableFromJoin(leftTable); 368 } 369 } 370 371 // Check right side 372 TTable rightTable = joinExpr.getRightTable(); 373 if (rightTable != null) { 374 if (rightTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !rightTable.isCTEName()) { 375 return rightTable; 376 } 377 if (rightTable.isCTEName() && rightTable.getCTE() != null) { 378 return traceTableThroughCTE(rightTable.getCTE()); 379 } 380 } 381 382 return null; 383 } 384 385 @Override 386 public List<TTable> getAllFinalTables() { 387 // If this CTE has a UNION subquery, delegate to the UnionNamespace 388 if (unionNamespace != null) { 389 return unionNamespace.getAllFinalTables(); 390 } 391 392 // Check if this CTE references another CTE (which might be a UNION) 393 if (selectStatement != null && selectStatement.tables != null && selectStatement.tables.size() > 0) { 394 TTable firstTable = selectStatement.tables.getTable(0); 395 if (firstTable != null && firstTable.isCTEName() && firstTable.getCTE() != null) { 396 TCTE referencedCTE = firstTable.getCTE(); 397 if (referencedCTE.getSubquery() != null) { 398 // Create a namespace for the referenced CTE to get its tables 399 CTENamespace referencedNs = new CTENamespace( 400 referencedCTE, 401 referencedCTE.getTableName() != null ? referencedCTE.getTableName().toString() : "cte", 402 referencedCTE.getSubquery(), 403 nameMatcher 404 ); 405 referencedNs.validate(); 406 // This will trace through the CTE chain to get all tables 407 // including from UNION branches 408 return referencedNs.getAllFinalTables(); 409 } 410 } 411 } 412 413 // For non-UNION, non-CTE-reference CTEs, return the single final table 414 TTable finalTable = getFinalTable(); 415 if (finalTable != null) { 416 return Collections.singletonList(finalTable); 417 } 418 419 return Collections.emptyList(); 420 } 421 422 @Override 423 protected void doValidate() { 424 columnSources = new LinkedHashMap<>(); 425 426 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 427 return; 428 } 429 430 TResultColumnList selectList = selectStatement.getResultColumnList(); 431 432 // If CTE has explicit column list, use it 433 if (!explicitColumns.isEmpty()) { 434 validateWithExplicitColumns(selectList); 435 } else { 436 // No explicit columns, derive from SELECT list 437 validateWithImplicitColumns(selectList); 438 } 439 } 440 441 /** 442 * Validate CTE with explicit column list. 443 * Example: WITH cte(c1, c2, c3) AS (SELECT a, b, c FROM t) 444 * 445 * Two cases are handled: 446 * 447 * 1. **Position-based (Snowflake pattern)**: CTE explicit column list + SELECT * 448 * Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees) 449 * - c1/c2/c3 are positional aliases for star expansion 450 * - Without metadata: c1 -> Employees.*, c2 -> Employees.*, c3 -> Employees.* 451 * - With metadata: c1 -> Employees.<col_1>, c2 -> Employees.<col_2>, etc. 452 * 453 * 2. **Direct mapping**: CTE explicit column list + named columns 454 * Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) 455 * - c1 -> t.id, c2 -> t.name (1:1 positional mapping) 456 * 457 * @see <a href="star_column_pushdown.md#cte-explicit-column-list--select--snowflake-case"> 458 * Documentation: CTE Explicit Column List + SELECT *</a> 459 */ 460 private void validateWithExplicitColumns(TResultColumnList selectList) { 461 // Check if the SELECT list contains only star column(s) - the position-based pattern 462 StarColumnInfo starInfo = analyzeStarColumns(selectList); 463 464 if (starInfo.isSingleStar()) { 465 // Position-based case: CTE(c1,c2,c3) AS (SELECT * FROM t) 466 // The column names c1/c2/c3 are positional aliases, not real column names 467 handleExplicitColumnsWithStar(starInfo.getStarColumn(), starInfo.getStarQualifier()); 468 } else { 469 // Direct mapping case: CTE(c1,c2) AS (SELECT id, name FROM t) 470 // Each explicit column maps to corresponding SELECT list item by position 471 handleExplicitColumnsWithDirectMapping(selectList); 472 } 473 } 474 475 /** 476 * Handle CTE explicit columns when SELECT list is a star. 477 * This is the position-based (Snowflake) pattern. 478 * 479 * @param starColumn the star column (* or table.*) 480 * @param starQualifier the table qualifier if qualified star (e.g., "src" for "src.*"), or null 481 */ 482 private void handleExplicitColumnsWithStar(TResultColumn starColumn, String starQualifier) { 483 // Try ordinal mapping if metadata is available 484 List<String> ordinalColumns = tryOrdinalMapping(starQualifier); 485 486 if (ordinalColumns != null && ordinalColumns.size() >= explicitColumns.size()) { 487 // Metadata available - use ordinal mapping: c1 -> Employees.<col_1> 488 for (int i = 0; i < explicitColumns.size(); i++) { 489 String cteColName = explicitColumns.get(i); 490 String baseColName = ordinalColumns.get(i); 491 492 ColumnSource source = new ColumnSource( 493 this, 494 cteColName, 495 starColumn, // Reference to star column 496 1.0, // High confidence - ordinal mapping from metadata 497 "cte_explicit_column_ordinal:" + baseColName 498 ); 499 columnSources.put(cteColName, source); 500 } 501 } else { 502 // No metadata - fallback to star reference: c1 -> Employees.* 503 for (String colName : explicitColumns) { 504 ColumnSource source = new ColumnSource( 505 this, 506 colName, 507 starColumn, // Reference to star column 508 0.8, // Lower confidence - ordinal mapping unknown 509 "cte_explicit_column_via_star" 510 ); 511 columnSources.put(colName, source); 512 } 513 } 514 } 515 516 /** 517 * Handle CTE explicit columns with direct positional mapping to SELECT list. 518 * 519 * @param selectList the SELECT list to map from 520 */ 521 private void handleExplicitColumnsWithDirectMapping(TResultColumnList selectList) { 522 int columnCount = Math.min(explicitColumns.size(), selectList.size()); 523 524 for (int i = 0; i < columnCount; i++) { 525 String colName = explicitColumns.get(i); 526 TResultColumn resultCol = selectList.getResultColumn(i); 527 528 ColumnSource source = new ColumnSource( 529 this, 530 colName, 531 resultCol, 532 1.0, // Definite - direct positional mapping 533 "cte_explicit_column" 534 ); 535 536 columnSources.put(colName, source); 537 } 538 } 539 540 /** 541 * Try to get ordered column names from metadata for ordinal mapping. 542 * 543 * @param starQualifier the table qualifier (e.g., "src"), or null for unqualified star 544 * @return ordered list of column names from metadata, or null if not available 545 */ 546 private List<String> tryOrdinalMapping(String starQualifier) { 547 // TODO: When metadata (TSQLEnv/DDL) is available, return ordered column list 548 // For now, return null to use the fallback (star reference) 549 // 550 // Future implementation: 551 // 1. Find the source table namespace by starQualifier 552 // 2. Get its column sources (which use LinkedHashMap for insertion order) 553 // 3. Return the column names in order 554 return null; 555 } 556 557 /** 558 * Analyze star columns in the SELECT list. 559 * Determines if the SELECT is a single star column pattern. 560 */ 561 private StarColumnInfo analyzeStarColumns(TResultColumnList selectList) { 562 if (selectList == null || selectList.size() == 0) { 563 return new StarColumnInfo(); 564 } 565 566 // Check for single star column pattern 567 if (selectList.size() == 1) { 568 TResultColumn rc = selectList.getResultColumn(0); 569 if (isStarColumn(rc)) { 570 String qualifier = getStarQualifier(rc); 571 return new StarColumnInfo(rc, qualifier); 572 } 573 } 574 575 return new StarColumnInfo(); 576 } 577 578 /** 579 * Check if a result column is a star column (* or table.*) 580 */ 581 private boolean isStarColumn(TResultColumn rc) { 582 if (rc == null) { 583 return false; 584 } 585 String str = rc.toString(); 586 return str != null && (str.equals("*") || str.endsWith(".*")); 587 } 588 589 /** 590 * Get the qualifier from a qualified star (src.* returns "src") 591 */ 592 private String getStarQualifier(TResultColumn rc) { 593 if (rc == null) { 594 return null; 595 } 596 String str = rc.toString(); 597 if (str != null && str.endsWith(".*") && str.length() > 2) { 598 return str.substring(0, str.length() - 2); 599 } 600 return null; 601 } 602 603 /** 604 * Helper class to hold star column analysis results. 605 */ 606 private static class StarColumnInfo { 607 private final TResultColumn starColumn; 608 private final String starQualifier; 609 610 StarColumnInfo() { 611 this.starColumn = null; 612 this.starQualifier = null; 613 } 614 615 StarColumnInfo(TResultColumn starColumn, String starQualifier) { 616 this.starColumn = starColumn; 617 this.starQualifier = starQualifier; 618 } 619 620 boolean isSingleStar() { 621 return starColumn != null; 622 } 623 624 TResultColumn getStarColumn() { 625 return starColumn; 626 } 627 628 String getStarQualifier() { 629 return starQualifier; 630 } 631 } 632 633 /** 634 * Validate CTE without explicit column list. 635 * Example: WITH cte AS (SELECT id, name FROM users) 636 */ 637 private void validateWithImplicitColumns(TResultColumnList selectList) { 638 for (int i = 0; i < selectList.size(); i++) { 639 TResultColumn resultCol = selectList.getResultColumn(i); 640 641 // Determine column name 642 String colName = getColumnName(resultCol); 643 if (colName == null) { 644 colName = "col_" + (i + 1); 645 } 646 647 // Create column source 648 ColumnSource source = new ColumnSource( 649 this, 650 colName, 651 resultCol, 652 1.0, // Definite - from SELECT list 653 "cte_implicit_column" 654 ); 655 656 columnSources.put(colName, source); 657 } 658 } 659 660 /** 661 * Extract column name from TResultColumn 662 */ 663 private String getColumnName(TResultColumn resultCol) { 664 // Check for alias 665 if (resultCol.getAliasClause() != null && 666 resultCol.getAliasClause().getAliasName() != null) { 667 return resultCol.getAliasClause().getAliasName().toString(); 668 } 669 670 // Check for simple column reference 671 if (resultCol.getExpr() != null) { 672 gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr(); 673 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 674 TObjectName objName = expr.getObjectOperand(); 675 if (objName != null) { 676 return objName.getColumnNameOnly(); 677 } 678 } 679 } 680 681 return null; 682 } 683 684 /** 685 * Extract explicit column list from CTE 686 */ 687 private List<String> extractExplicitColumns(TCTE cte) { 688 List<String> columns = new ArrayList<>(); 689 690 if (cte != null && cte.getColumnList() != null) { 691 for (int i = 0; i < cte.getColumnList().size(); i++) { 692 TObjectName colName = cte.getColumnList().getObjectName(i); 693 if (colName != null) { 694 columns.add(colName.toString()); 695 } 696 } 697 } 698 699 return columns; 700 } 701 702 /** 703 * Check if this is a recursive CTE 704 */ 705 private boolean isRecursiveCTE(TCTE cte) { 706 if (cte == null) { 707 return false; 708 } 709 return cte.isRecursive(); 710 } 711 712 public TCTE getCTE() { 713 return cte; 714 } 715 716 @Override 717 public TSelectSqlStatement getSelectStatement() { 718 return selectStatement; 719 } 720 721 @Override 722 public boolean hasStarColumn() { 723 // If this CTE has a UNION subquery, delegate to the UnionNamespace 724 if (unionNamespace != null) { 725 return unionNamespace.hasStarColumn(); 726 } 727 728 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 729 return false; 730 } 731 732 TResultColumnList selectList = selectStatement.getResultColumnList(); 733 for (int i = 0; i < selectList.size(); i++) { 734 TResultColumn resultCol = selectList.getResultColumn(i); 735 if (resultCol != null && resultCol.toString().endsWith("*")) { 736 return true; 737 } 738 } 739 return false; 740 } 741 742 @Override 743 public boolean supportsDynamicInference() { 744 return hasStarColumn(); 745 } 746 747 @Override 748 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 749 if (columnName == null || columnName.isEmpty()) { 750 return false; 751 } 752 753 // Initialize maps if needed 754 if (inferredColumns == null) { 755 inferredColumns = new LinkedHashMap<>(); 756 } 757 if (inferredColumnNames == null) { 758 inferredColumnNames = new HashSet<>(); 759 } 760 761 // Check if already exists in explicit columns 762 if (columnSources != null && columnSources.containsKey(columnName)) { 763 return false; 764 } 765 766 // Check if already inferred 767 if (inferredColumns.containsKey(columnName)) { 768 return false; 769 } 770 771 // Collect candidate tables - get ALL final tables from the CTE chain 772 // This handles both UNION CTEs and CTEs that reference other CTEs 773 java.util.List<TTable> candidateTables = new java.util.ArrayList<>(); 774 775 // Get all final tables from this CTE's namespace (handles UNION and CTE chains) 776 java.util.List<TTable> allTables = this.getAllFinalTables(); 777 for (TTable table : allTables) { 778 if (table != null && !candidateTables.contains(table)) { 779 candidateTables.add(table); 780 } 781 } 782 783 // Create inferred column source WITH candidate tables if applicable 784 ColumnSource source = new ColumnSource( 785 this, 786 columnName, 787 null, 788 confidence, 789 evidence, 790 null, // overrideTable 791 (candidateTables != null && !candidateTables.isEmpty()) ? candidateTables : null 792 ); 793 794 inferredColumns.put(columnName, source); 795 inferredColumnNames.add(columnName); 796 797 // Propagate to nested namespaces if this CTE has SELECT * from subqueries/unions 798 propagateToNestedNamespaces(columnName, confidence, evidence); 799 800 // NOTE: Propagation to referenced CTEs (CTE chains like cte2 -> cte1) is handled 801 // by NamespaceEnhancer.propagateThroughCTEChains() which has access to the actual 802 // namespace instances from the scope tree. We don't do it here because creating 803 // new CTENamespace instances would not affect the actual instances used for resolution. 804 805 return true; 806 } 807 808 /** 809 * Propagate an inferred column to nested namespaces. 810 * 811 * This is a unified algorithm that handles: 812 * 1. Direct UNION subqueries (CTE body is a UNION) 813 * 2. SELECT * FROM (UNION) patterns 814 * 3. SELECT * FROM (subquery) patterns 815 * 4. Deeply nested structures with JOINs 816 * 817 * The propagation is recursive - each namespace that receives the column 818 * will further propagate to its own nested namespaces. 819 * 820 * @param columnName The column name to propagate 821 * @param confidence Confidence score 822 * @param evidence Evidence string for debugging 823 */ 824 private void propagateToNestedNamespaces(String columnName, double confidence, String evidence) { 825 // Case 1: Direct UNION subquery (CTE body is a UNION) 826 if (unionNamespace != null) { 827 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 828 System.out.println("[CTENamespace] Propagating '" + columnName + "' to direct unionNamespace in " + cteName); 829 } 830 unionNamespace.addInferredColumn(columnName, confidence, evidence + "_cte_union_propagate"); 831 return; 832 } 833 834 // Case 2: CTE has SELECT * from nested structures (subqueries, unions in FROM clause) 835 // Only propagate if the CTE's SELECT list contains a star column 836 if (!hasStarColumn()) { 837 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 838 System.out.println("[CTENamespace] No star column in " + cteName + ", skipping FROM clause propagation"); 839 } 840 return; 841 } 842 843 // Get or create namespaces for FROM clause tables 844 List<INamespace> fromNamespaces = getOrCreateFromClauseNamespaces(); 845 if (fromNamespaces.isEmpty()) { 846 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 847 System.out.println("[CTENamespace] No FROM clause namespaces with dynamic inference in " + cteName); 848 } 849 return; 850 } 851 852 // Propagate to each FROM clause namespace 853 for (INamespace ns : fromNamespaces) { 854 if (ns.supportsDynamicInference()) { 855 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 856 System.out.println("[CTENamespace] Propagating '" + columnName + "' to FROM clause namespace " + 857 ns.getDisplayName() + " in " + cteName); 858 } 859 // The nested namespace's addInferredColumn will recursively propagate further 860 ns.addInferredColumn(columnName, confidence, evidence + "_cte_from_propagate"); 861 } 862 } 863 } 864 865 @Override 866 public Set<String> getInferredColumns() { 867 if (inferredColumnNames == null) { 868 return Collections.emptySet(); 869 } 870 return Collections.unmodifiableSet(inferredColumnNames); 871 } 872 873 @Override 874 public ColumnLevel hasColumn(String columnName) { 875 ensureValidated(); 876 877 // Check in explicit columns 878 if (columnSources != null) { 879 for (String existingCol : columnSources.keySet()) { 880 if (nameMatcher.matches(existingCol, columnName)) { 881 return ColumnLevel.EXISTS; 882 } 883 } 884 } 885 886 // Check in inferred columns 887 if (inferredColumns != null && inferredColumns.containsKey(columnName)) { 888 return ColumnLevel.EXISTS; 889 } 890 891 // If has star column, unknown columns MAYBE exist 892 if (hasStarColumn()) { 893 return ColumnLevel.MAYBE; 894 } 895 896 // If the CTE has explicit column definitions like "cte(c1, c2, c3)", then ONLY 897 // those columns exist - don't return MAYBE for other columns. 898 // This prevents ambiguous resolution when a CTE with explicit columns is joined 899 // with another table. 900 if (!explicitColumns.isEmpty()) { 901 return ColumnLevel.NOT_EXISTS; 902 } 903 904 // For CTEs without explicit columns AND without star columns, check if underlying 905 // tables might have the column. This handles cases like referencing columns from 906 // the CTE's base tables that aren't explicitly selected in the CTE's SELECT list. 907 if (selectStatement != null && selectStatement.tables != null) { 908 for (int i = 0; i < selectStatement.tables.size(); i++) { 909 TTable table = selectStatement.tables.getTable(i); 910 if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) { 911 // The CTE has a base table - column might exist there 912 return ColumnLevel.MAYBE; 913 } 914 } 915 } 916 917 return ColumnLevel.NOT_EXISTS; 918 } 919 920 @Override 921 public ColumnSource resolveColumn(String columnName) { 922 ensureValidated(); 923 924 // First check explicit columns 925 ColumnSource source = super.resolveColumn(columnName); 926 if (source != null) { 927 return source; 928 } 929 930 // Then check inferred columns 931 if (inferredColumns != null) { 932 for (Map.Entry<String, ColumnSource> entry : inferredColumns.entrySet()) { 933 if (nameMatcher.matches(entry.getKey(), columnName)) { 934 return entry.getValue(); 935 } 936 } 937 } 938 939 // If has star column, auto-infer this column 940 if (hasStarColumn()) { 941 boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_reference"); 942 if (added && inferredColumns != null) { 943 return inferredColumns.get(columnName); 944 } 945 } 946 947 // For CTEs without star columns, check if underlying base tables might have the column. 948 // This handles references to columns that aren't explicitly selected in the CTE's SELECT list. 949 if (selectStatement != null && selectStatement.tables != null) { 950 for (int i = 0; i < selectStatement.tables.size(); i++) { 951 TTable table = selectStatement.tables.getTable(i); 952 if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) { 953 // Create an inferred column source that traces to the base table 954 boolean added = addInferredColumn(columnName, 0.6, "inferred_from_cte_base_table"); 955 if (added && inferredColumns != null) { 956 return inferredColumns.get(columnName); 957 } 958 break; 959 } 960 } 961 } 962 963 return null; 964 } 965 966 /** 967 * Get the UnionNamespace if this CTE's subquery is a UNION. 968 */ 969 public UnionNamespace getUnionNamespace() { 970 return unionNamespace; 971 } 972 973 /** 974 * Get or create namespaces for FROM clause tables that support dynamic inference. 975 * This handles cases like: WITH cte AS (SELECT * FROM (UNION) sub) 976 * where the CTE body is not directly a UNION but contains a subquery with UNION. 977 * 978 * The namespaces are lazily created and cached for reuse. 979 * 980 * @return List of namespaces that support dynamic inference (may be empty) 981 */ 982 private List<INamespace> getOrCreateFromClauseNamespaces() { 983 if (fromClauseNamespaces != null) { 984 return fromClauseNamespaces; 985 } 986 987 fromClauseNamespaces = new ArrayList<>(); 988 989 if (selectStatement == null || selectStatement.tables == null) { 990 return fromClauseNamespaces; 991 } 992 993 // Iterate through FROM clause tables and create namespaces for those that 994 // could have star columns (subqueries, unions, CTE references) 995 for (int i = 0; i < selectStatement.tables.size(); i++) { 996 TTable table = selectStatement.tables.getTable(i); 997 if (table == null) continue; 998 999 INamespace ns = createNamespaceForTable(table); 1000 if (ns != null && ns.supportsDynamicInference()) { 1001 fromClauseNamespaces.add(ns); 1002 } 1003 } 1004 1005 return fromClauseNamespaces; 1006 } 1007 1008 /** 1009 * Create an appropriate namespace for a table in the FROM clause. 1010 * Handles subqueries (including UNION), CTE references, and joins recursively. 1011 * 1012 * @param table The table from the FROM clause 1013 * @return INamespace for the table, or null if not applicable 1014 */ 1015 private INamespace createNamespaceForTable(TTable table) { 1016 if (table == null) return null; 1017 1018 // Handle subquery tables 1019 if (table.getSubquery() != null) { 1020 TSelectSqlStatement subquery = table.getSubquery(); 1021 String alias = table.getAliasName(); 1022 1023 // Check if subquery is a UNION/INTERSECT/EXCEPT 1024 if (subquery.isCombinedQuery()) { 1025 UnionNamespace unionNs = new UnionNamespace(subquery, alias, nameMatcher); 1026 return unionNs; 1027 } else { 1028 // Regular subquery - create SubqueryNamespace 1029 SubqueryNamespace subNs = new SubqueryNamespace(subquery, alias, nameMatcher); 1030 subNs.validate(); 1031 return subNs; 1032 } 1033 } 1034 1035 // Handle CTE references - these are handled by NamespaceEnhancer.propagateThroughCTEChains() 1036 // We don't create new CTENamespace here because we need the actual instances from scope tree 1037 1038 // Handle JOIN tables - recursively collect from join expressions 1039 if (table.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 1040 return createNamespaceForJoin(table); 1041 } 1042 1043 return null; 1044 } 1045 1046 /** 1047 * Create namespaces for tables within a JOIN expression. 1048 * Returns a composite namespace that wraps all namespaces from the join. 1049 * 1050 * @param joinTable The JOIN table 1051 * @return INamespace that wraps join namespaces, or null 1052 */ 1053 private INamespace createNamespaceForJoin(TTable joinTable) { 1054 if (joinTable == null || joinTable.getJoinExpr() == null) { 1055 return null; 1056 } 1057 1058 gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr(); 1059 1060 // Collect namespaces from both sides of the join 1061 List<INamespace> joinNamespaces = new ArrayList<>(); 1062 1063 // Left side 1064 TTable leftTable = joinExpr.getLeftTable(); 1065 if (leftTable != null) { 1066 INamespace leftNs = createNamespaceForTable(leftTable); 1067 if (leftNs != null && leftNs.supportsDynamicInference()) { 1068 joinNamespaces.add(leftNs); 1069 } 1070 } 1071 1072 // Right side 1073 TTable rightTable = joinExpr.getRightTable(); 1074 if (rightTable != null) { 1075 INamespace rightNs = createNamespaceForTable(rightTable); 1076 if (rightNs != null && rightNs.supportsDynamicInference()) { 1077 joinNamespaces.add(rightNs); 1078 } 1079 } 1080 1081 // If we found namespaces, add them to fromClauseNamespaces directly 1082 // (we don't create a composite namespace, just add the individual ones) 1083 if (!joinNamespaces.isEmpty()) { 1084 fromClauseNamespaces.addAll(joinNamespaces); 1085 } 1086 1087 return null; // Individual namespaces added directly to fromClauseNamespaces 1088 } 1089 1090 public List<String> getExplicitColumns() { 1091 return new ArrayList<>(explicitColumns); 1092 } 1093 1094 public boolean isRecursive() { 1095 return recursive; 1096 } 1097 1098 @Override 1099 public String toString() { 1100 return String.format("CTENamespace(%s, columns=%d, recursive=%s)", 1101 cteName, 1102 columnSources != null ? columnSources.size() : explicitColumns.size(), 1103 recursive 1104 ); 1105 } 1106}