001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.nodes.TCTE; 004import gudusoft.gsqlparser.nodes.TObjectName; 005import gudusoft.gsqlparser.nodes.TResultColumn; 006import gudusoft.gsqlparser.nodes.TResultColumnList; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.resolver2.ColumnLevel; 009import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 010import gudusoft.gsqlparser.resolver2.model.ColumnSource; 011import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 012 013import java.util.ArrayList; 014import java.util.Collections; 015import java.util.HashSet; 016import java.util.LinkedHashMap; 017import java.util.List; 018import java.util.Map; 019import java.util.Set; 020 021/** 022 * Namespace representing a Common Table Expression (CTE). 023 * Similar to SubqueryNamespace but handles CTE-specific features: 024 * - Explicit column list: WITH cte(c1, c2) AS (SELECT ...) 025 * - Recursive CTEs 026 * - Multiple references within same query 027 * - UNION subqueries: columns are pushed through to all UNION branches 028 * 029 * Example: 030 * WITH my_cte(id, name) AS ( 031 * SELECT user_id, user_name FROM users 032 * ) 033 * SELECT id, name FROM my_cte; 034 */ 035public class CTENamespace extends AbstractNamespace { 036 037 private final TCTE cte; 038 private final String cteName; 039 private final TSelectSqlStatement selectStatement; 040 041 /** CTE column list (explicit column names) */ 042 private final List<String> explicitColumns; 043 044 /** Whether this CTE is recursive */ 045 private final boolean recursive; 046 047 /** UnionNamespace if this CTE's subquery is a UNION */ 048 private UnionNamespace unionNamespace; 049 050 /** Inferred columns from star push-down */ 051 private Map<String, ColumnSource> inferredColumns; 052 053 /** Track inferred column names */ 054 private Set<String> inferredColumnNames; 055 056 /** 057 * The TTable that references this CTE in a FROM clause. 058 * Used as fallback for getFinalTable() when there's no underlying physical table. 059 * For example: WITH cte AS (SELECT 1 AS col) SELECT col FROM cte 060 * The referencing TTable is the 'cte' in the FROM clause. 061 */ 062 private TTable referencingTable; 063 064 /** 065 * Namespaces for FROM clause tables that support dynamic inference. 066 * Used to propagate inferred columns through deeply nested structures. 067 * Lazily initialized when needed. 068 * 069 * Example: WITH cte AS (SELECT * FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2) sub) 070 * The 'sub' subquery namespace is stored here for propagation. 071 */ 072 private List<INamespace> fromClauseNamespaces; 073 074 public CTENamespace(TCTE cte, 075 String cteName, 076 TSelectSqlStatement selectStatement, 077 INameMatcher nameMatcher) { 078 super(cte, nameMatcher); 079 this.cte = cte; 080 this.cteName = cteName; 081 this.selectStatement = selectStatement; 082 this.explicitColumns = extractExplicitColumns(cte); 083 this.recursive = isRecursiveCTE(cte); 084 085 // If the CTE's subquery is a UNION, create a UnionNamespace to handle it 086 if (selectStatement != null && selectStatement.isCombinedQuery()) { 087 this.unionNamespace = new UnionNamespace(selectStatement, cteName + "_union", nameMatcher); 088 } 089 } 090 091 public CTENamespace(TCTE cte, String cteName, TSelectSqlStatement selectStatement) { 092 this(cte, cteName, selectStatement, null); 093 } 094 095 @Override 096 public String getDisplayName() { 097 return cteName; 098 } 099 100 /** 101 * Get the TTable that references this CTE in a FROM clause. 102 */ 103 public TTable getReferencingTable() { 104 return referencingTable; 105 } 106 107 /** 108 * {@inheritDoc} 109 * For CTENamespace, returns the TTable that references this CTE in the query. 110 * This is the immediate source table for columns resolved through this CTE. 111 */ 112 @Override 113 public TTable getSourceTable() { 114 return referencingTable; 115 } 116 117 /** 118 * Set the TTable that references this CTE in a FROM clause. 119 * Called by ScopeBuilder when a CTE is referenced. 120 */ 121 public void setReferencingTable(TTable table) { 122 this.referencingTable = table; 123 } 124 125 @Override 126 public TTable getFinalTable() { 127 // Trace through the CTE's subquery to find the underlying physical table 128 // This is similar to SubqueryNamespace.getFinalTable() but handles CTE chains 129 130 // If this CTE has a UNION subquery, delegate to UnionNamespace 131 if (unionNamespace != null) { 132 TTable unionTable = unionNamespace.getFinalTable(); 133 if (unionTable != null) { 134 return unionTable; 135 } 136 // Fallback to referencing table if UNION has no physical tables 137 return referencingTable; 138 } 139 140 // If no tables in the CTE's SELECT, return the referencing table 141 // This handles CTEs like: WITH cte AS (SELECT 1 AS col) 142 if (selectStatement == null || selectStatement.tables == null || selectStatement.tables.size() == 0) { 143 return referencingTable; 144 } 145 146 // Check for qualified star column (e.g., CTE_NAME.*) first 147 TTable qualifiedStarTable = findTableFromQualifiedStar(); 148 if (qualifiedStarTable != null) { 149 return qualifiedStarTable; 150 } 151 152 // For single-table CTEs, trace to the underlying table 153 TTable firstTable = selectStatement.tables.getTable(0); 154 if (firstTable == null) { 155 return null; 156 } 157 158 // If it's a physical table (not a CTE reference), return it 159 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) { 160 return firstTable; 161 } 162 163 // If it's a CTE reference, trace through the CTE chain 164 if (firstTable.isCTEName() && firstTable.getCTE() != null) { 165 return traceTableThroughCTE(firstTable.getCTE()); 166 } 167 168 // If it's a subquery, trace through it 169 if (firstTable.getSubquery() != null) { 170 SubqueryNamespace nestedNs = new SubqueryNamespace( 171 firstTable.getSubquery(), 172 firstTable.getAliasName(), 173 nameMatcher 174 ); 175 nestedNs.validate(); 176 TTable subTable = nestedNs.getFinalTable(); 177 if (subTable != null) { 178 return subTable; 179 } 180 } 181 182 // If it's a join, get the first base table 183 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 184 TTable joinTable = findFirstPhysicalTableFromJoin(firstTable); 185 if (joinTable != null) { 186 return joinTable; 187 } 188 } 189 190 // Fallback: return the referencing TTable (the CTE reference in FROM clause) 191 // This is used when the CTE doesn't have underlying physical tables, 192 // e.g., WITH cte AS (SELECT 1 AS col) - the columns are literals, not from tables 193 return referencingTable; 194 } 195 196 /** 197 * Find the table referenced by a qualified star column in this CTE's SELECT list. 198 * Example: SELECT other_cte.* FROM other_cte -> traces to other_cte's underlying table 199 */ 200 private TTable findTableFromQualifiedStar() { 201 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 202 return null; 203 } 204 205 TResultColumnList selectList = selectStatement.getResultColumnList(); 206 for (int i = 0; i < selectList.size(); i++) { 207 TResultColumn resultCol = selectList.getResultColumn(i); 208 if (resultCol == null) continue; 209 210 String colStr = resultCol.toString().trim(); 211 // Check if it's a qualified star (contains . before *) 212 if (colStr.endsWith("*") && colStr.contains(".")) { 213 int dotIndex = colStr.lastIndexOf('.'); 214 if (dotIndex > 0) { 215 String tablePrefix = colStr.substring(0, dotIndex).trim(); 216 // Find the table with this alias or name 217 TTable matchingTable = findTableByAliasOrName(tablePrefix); 218 if (matchingTable != null) { 219 // If the matching table is a CTE reference, trace through it 220 if (matchingTable.isCTEName() && matchingTable.getCTE() != null) { 221 return traceTableThroughCTE(matchingTable.getCTE()); 222 } 223 // If it's a subquery, trace through it 224 if (matchingTable.getSubquery() != null) { 225 SubqueryNamespace nestedNs = new SubqueryNamespace( 226 matchingTable.getSubquery(), 227 matchingTable.getAliasName(), 228 nameMatcher 229 ); 230 nestedNs.validate(); 231 return nestedNs.getFinalTable(); 232 } 233 // If it's a physical table, return it 234 if (matchingTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !matchingTable.isCTEName()) { 235 return matchingTable; 236 } 237 } 238 } 239 } 240 } 241 return null; 242 } 243 244 /** 245 * Find a table in the FROM clause by alias or name. 246 */ 247 private TTable findTableByAliasOrName(String nameOrAlias) { 248 if (selectStatement == null || selectStatement.tables == null) { 249 return null; 250 } 251 252 for (int i = 0; i < selectStatement.tables.size(); i++) { 253 TTable table = selectStatement.tables.getTable(i); 254 if (table == null) continue; 255 256 // Check alias 257 String alias = table.getAliasName(); 258 if (alias != null && nameMatcher.matches(alias, nameOrAlias)) { 259 return table; 260 } 261 262 // Check table name 263 if (table.getTableName() != null && nameMatcher.matches(table.getTableName().toString(), nameOrAlias)) { 264 return table; 265 } 266 } 267 return null; 268 } 269 270 /** 271 * Trace through a CTE to find its underlying physical table. 272 * This handles CTE chains like: CTE1 -> CTE2 -> CTE3 -> physical_table 273 */ 274 private TTable traceTableThroughCTE(TCTE cteNode) { 275 return traceTableThroughCTE(cteNode, new HashSet<TCTE>()); 276 } 277 278 private TTable traceTableThroughCTE(TCTE cteNode, java.util.Set<TCTE> visited) { 279 if (cteNode == null || cteNode.getSubquery() == null) { 280 return null; 281 } 282 283 // Detect circular CTE references 284 if (!visited.add(cteNode)) { 285 return null; 286 } 287 288 TSelectSqlStatement cteSubquery = cteNode.getSubquery(); 289 290 // Handle UNION in the CTE 291 if (cteSubquery.isCombinedQuery()) { 292 // For UNION, trace the left branch 293 TSelectSqlStatement leftStmt = cteSubquery.getLeftStmt(); 294 if (leftStmt != null && leftStmt.tables != null && leftStmt.tables.size() > 0) { 295 cteSubquery = leftStmt; 296 } 297 } 298 299 if (cteSubquery.tables == null || cteSubquery.tables.size() == 0) { 300 return null; 301 } 302 303 TTable firstTable = cteSubquery.tables.getTable(0); 304 if (firstTable == null) { 305 return null; 306 } 307 308 // If it's a physical table (not CTE), we found it 309 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) { 310 return firstTable; 311 } 312 313 // If it's another CTE reference, continue tracing 314 if (firstTable.isCTEName() && firstTable.getCTE() != null) { 315 return traceTableThroughCTE(firstTable.getCTE(), visited); 316 } 317 318 // If it's a subquery, trace through it 319 if (firstTable.getSubquery() != null) { 320 SubqueryNamespace nestedNs = new SubqueryNamespace( 321 firstTable.getSubquery(), 322 firstTable.getAliasName(), 323 nameMatcher 324 ); 325 nestedNs.validate(); 326 return nestedNs.getFinalTable(); 327 } 328 329 // If it's a join, get the first base table 330 if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 331 return findFirstPhysicalTableFromJoin(firstTable); 332 } 333 334 return null; 335 } 336 337 /** 338 * Find the first physical table from a JOIN expression. 339 */ 340 private TTable findFirstPhysicalTableFromJoin(TTable joinTable) { 341 if (joinTable == null || joinTable.getJoinExpr() == null) { 342 return null; 343 } 344 345 gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr(); 346 347 // Check left side first 348 TTable leftTable = joinExpr.getLeftTable(); 349 if (leftTable != null) { 350 if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !leftTable.isCTEName()) { 351 return leftTable; 352 } 353 if (leftTable.isCTEName() && leftTable.getCTE() != null) { 354 TTable traced = traceTableThroughCTE(leftTable.getCTE()); 355 if (traced != null) return traced; 356 } 357 if (leftTable.getSubquery() != null) { 358 SubqueryNamespace nestedNs = new SubqueryNamespace( 359 leftTable.getSubquery(), 360 leftTable.getAliasName(), 361 nameMatcher 362 ); 363 nestedNs.validate(); 364 return nestedNs.getFinalTable(); 365 } 366 if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 367 return findFirstPhysicalTableFromJoin(leftTable); 368 } 369 } 370 371 // Check right side 372 TTable rightTable = joinExpr.getRightTable(); 373 if (rightTable != null) { 374 if (rightTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !rightTable.isCTEName()) { 375 return rightTable; 376 } 377 if (rightTable.isCTEName() && rightTable.getCTE() != null) { 378 return traceTableThroughCTE(rightTable.getCTE()); 379 } 380 } 381 382 return null; 383 } 384 385 @Override 386 public List<TTable> getAllFinalTables() { 387 // If this CTE has a UNION subquery, delegate to the UnionNamespace 388 if (unionNamespace != null) { 389 return unionNamespace.getAllFinalTables(); 390 } 391 392 // Check if this CTE references another CTE (which might be a UNION) 393 if (selectStatement != null && selectStatement.tables != null && selectStatement.tables.size() > 0) { 394 TTable firstTable = selectStatement.tables.getTable(0); 395 if (firstTable != null && firstTable.isCTEName() && firstTable.getCTE() != null) { 396 TCTE referencedCTE = firstTable.getCTE(); 397 if (referencedCTE.getSubquery() != null) { 398 // Create a namespace for the referenced CTE to get its tables 399 CTENamespace referencedNs = new CTENamespace( 400 referencedCTE, 401 referencedCTE.getTableName() != null ? referencedCTE.getTableName().toString() : "cte", 402 referencedCTE.getSubquery(), 403 nameMatcher 404 ); 405 referencedNs.validate(); 406 // This will trace through the CTE chain to get all tables 407 // including from UNION branches 408 return referencedNs.getAllFinalTables(); 409 } 410 } 411 } 412 413 // For non-UNION, non-CTE-reference CTEs, return the single final table 414 TTable finalTable = getFinalTable(); 415 if (finalTable != null) { 416 return Collections.singletonList(finalTable); 417 } 418 419 return Collections.emptyList(); 420 } 421 422 @Override 423 protected void doValidate() { 424 columnSources = new LinkedHashMap<>(); 425 426 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 427 return; 428 } 429 430 TResultColumnList selectList = selectStatement.getResultColumnList(); 431 432 // If CTE has explicit column list, use it 433 if (!explicitColumns.isEmpty()) { 434 validateWithExplicitColumns(selectList); 435 } else { 436 // No explicit columns, derive from SELECT list 437 validateWithImplicitColumns(selectList); 438 } 439 } 440 441 /** 442 * Validate CTE with explicit column list. 443 * Example: WITH cte(c1, c2, c3) AS (SELECT a, b, c FROM t) 444 * 445 * Two cases are handled: 446 * 447 * 1. **Position-based (Snowflake pattern)**: CTE explicit column list + SELECT * 448 * Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees) 449 * - c1/c2/c3 are positional aliases for star expansion 450 * - Without metadata: c1 -> Employees.*, c2 -> Employees.*, c3 -> Employees.* 451 * - With metadata: c1 -> Employees.<col_1>, c2 -> Employees.<col_2>, etc. 452 * 453 * 2. **Direct mapping**: CTE explicit column list + named columns 454 * Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) 455 * - c1 -> t.id, c2 -> t.name (1:1 positional mapping) 456 * 457 * @see <a href="star_column_pushdown.md#cte-explicit-column-list--select--snowflake-case"> 458 * Documentation: CTE Explicit Column List + SELECT *</a> 459 */ 460 private void validateWithExplicitColumns(TResultColumnList selectList) { 461 // Check if the SELECT list contains only star column(s) - the position-based pattern 462 StarColumnInfo starInfo = analyzeStarColumns(selectList); 463 464 if (starInfo.isSingleStar()) { 465 // Position-based case: CTE(c1,c2,c3) AS (SELECT * FROM t) 466 // The column names c1/c2/c3 are positional aliases, not real column names 467 handleExplicitColumnsWithStar(starInfo.getStarColumn(), starInfo.getStarQualifier()); 468 } else { 469 // Direct mapping case: CTE(c1,c2) AS (SELECT id, name FROM t) 470 // Each explicit column maps to corresponding SELECT list item by position 471 handleExplicitColumnsWithDirectMapping(selectList); 472 } 473 } 474 475 /** 476 * Handle CTE explicit columns when SELECT list is a star. 477 * This is the position-based (Snowflake) pattern. 478 * 479 * @param starColumn the star column (* or table.*) 480 * @param starQualifier the table qualifier if qualified star (e.g., "src" for "src.*"), or null 481 */ 482 private void handleExplicitColumnsWithStar(TResultColumn starColumn, String starQualifier) { 483 // Try ordinal mapping if metadata is available 484 List<String> ordinalColumns = tryOrdinalMapping(starQualifier); 485 486 if (ordinalColumns != null && ordinalColumns.size() >= explicitColumns.size()) { 487 // Metadata available - use ordinal mapping: c1 -> Employees.<col_1> 488 for (int i = 0; i < explicitColumns.size(); i++) { 489 String cteColName = explicitColumns.get(i); 490 String baseColName = ordinalColumns.get(i); 491 492 ColumnSource source = new ColumnSource( 493 this, 494 cteColName, 495 starColumn, // Reference to star column 496 1.0, // High confidence - ordinal mapping from metadata 497 "cte_explicit_column_ordinal:" + baseColName 498 ); 499 columnSources.put(cteColName, source); 500 } 501 } else { 502 // No metadata - fallback to star reference: c1 -> Employees.* 503 for (String colName : explicitColumns) { 504 ColumnSource source = new ColumnSource( 505 this, 506 colName, 507 starColumn, // Reference to star column 508 0.8, // Lower confidence - ordinal mapping unknown 509 "cte_explicit_column_via_star" 510 ); 511 columnSources.put(colName, source); 512 } 513 } 514 } 515 516 /** 517 * Handle CTE explicit columns with direct positional mapping to SELECT list. 518 * 519 * @param selectList the SELECT list to map from 520 */ 521 private void handleExplicitColumnsWithDirectMapping(TResultColumnList selectList) { 522 int columnCount = Math.min(explicitColumns.size(), selectList.size()); 523 524 for (int i = 0; i < columnCount; i++) { 525 String colName = explicitColumns.get(i); 526 TResultColumn resultCol = selectList.getResultColumn(i); 527 528 ColumnSource source = new ColumnSource( 529 this, 530 colName, 531 resultCol, 532 1.0, // Definite - direct positional mapping 533 "cte_explicit_column" 534 ); 535 536 columnSources.put(colName, source); 537 } 538 } 539 540 /** 541 * Try to get ordered column names from metadata for ordinal mapping. 542 * 543 * @param starQualifier the table qualifier (e.g., "src"), or null for unqualified star 544 * @return ordered list of column names from metadata, or null if not available 545 */ 546 private List<String> tryOrdinalMapping(String starQualifier) { 547 // TODO: When metadata (TSQLEnv/DDL) is available, return ordered column list 548 // For now, return null to use the fallback (star reference) 549 // 550 // Future implementation: 551 // 1. Find the source table namespace by starQualifier 552 // 2. Get its column sources (which use LinkedHashMap for insertion order) 553 // 3. Return the column names in order 554 return null; 555 } 556 557 /** 558 * Analyze star columns in the SELECT list. 559 * Determines if the SELECT is a single star column pattern. 560 */ 561 private StarColumnInfo analyzeStarColumns(TResultColumnList selectList) { 562 if (selectList == null || selectList.size() == 0) { 563 return new StarColumnInfo(); 564 } 565 566 // Check for single star column pattern 567 if (selectList.size() == 1) { 568 TResultColumn rc = selectList.getResultColumn(0); 569 if (isStarColumn(rc)) { 570 String qualifier = getStarQualifier(rc); 571 return new StarColumnInfo(rc, qualifier); 572 } 573 } 574 575 return new StarColumnInfo(); 576 } 577 578 /** 579 * Check if a result column is a star column (* or table.*) 580 */ 581 private boolean isStarColumn(TResultColumn rc) { 582 if (rc == null) { 583 return false; 584 } 585 String str = rc.toString(); 586 return str != null && (str.equals("*") || str.endsWith(".*")); 587 } 588 589 /** 590 * Get the qualifier from a qualified star (src.* returns "src") 591 */ 592 private String getStarQualifier(TResultColumn rc) { 593 if (rc == null) { 594 return null; 595 } 596 String str = rc.toString(); 597 if (str != null && str.endsWith(".*") && str.length() > 2) { 598 return str.substring(0, str.length() - 2); 599 } 600 return null; 601 } 602 603 /** 604 * Helper class to hold star column analysis results. 605 */ 606 private static class StarColumnInfo { 607 private final TResultColumn starColumn; 608 private final String starQualifier; 609 610 StarColumnInfo() { 611 this.starColumn = null; 612 this.starQualifier = null; 613 } 614 615 StarColumnInfo(TResultColumn starColumn, String starQualifier) { 616 this.starColumn = starColumn; 617 this.starQualifier = starQualifier; 618 } 619 620 boolean isSingleStar() { 621 return starColumn != null; 622 } 623 624 TResultColumn getStarColumn() { 625 return starColumn; 626 } 627 628 String getStarQualifier() { 629 return starQualifier; 630 } 631 } 632 633 /** 634 * Validate CTE without explicit column list. 635 * Example: WITH cte AS (SELECT id, name FROM users) 636 */ 637 private void validateWithImplicitColumns(TResultColumnList selectList) { 638 for (int i = 0; i < selectList.size(); i++) { 639 TResultColumn resultCol = selectList.getResultColumn(i); 640 641 // Determine column name 642 String colName = getColumnName(resultCol); 643 if (colName == null) { 644 colName = "col_" + (i + 1); 645 } 646 647 // Create column source 648 ColumnSource source = new ColumnSource( 649 this, 650 colName, 651 resultCol, 652 1.0, // Definite - from SELECT list 653 "cte_implicit_column" 654 ); 655 656 columnSources.put(colName, source); 657 } 658 } 659 660 /** 661 * Extract column name from TResultColumn 662 */ 663 private String getColumnName(TResultColumn resultCol) { 664 // Check for alias 665 if (resultCol.getAliasClause() != null && 666 resultCol.getAliasClause().getAliasName() != null) { 667 return resultCol.getAliasClause().getAliasName().toString(); 668 } 669 670 // Check for simple column reference 671 if (resultCol.getExpr() != null) { 672 gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr(); 673 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 674 TObjectName objName = expr.getObjectOperand(); 675 if (objName != null) { 676 return objName.getColumnNameOnly(); 677 } 678 } 679 } 680 681 return null; 682 } 683 684 /** 685 * Extract explicit column list from CTE 686 */ 687 private List<String> extractExplicitColumns(TCTE cte) { 688 List<String> columns = new ArrayList<>(); 689 690 if (cte != null && cte.getColumnList() != null) { 691 for (int i = 0; i < cte.getColumnList().size(); i++) { 692 TObjectName colName = cte.getColumnList().getObjectName(i); 693 if (colName != null) { 694 columns.add(colName.toString()); 695 } 696 } 697 } 698 699 return columns; 700 } 701 702 /** 703 * Check if this is a recursive CTE 704 */ 705 private boolean isRecursiveCTE(TCTE cte) { 706 if (cte == null) { 707 return false; 708 } 709 return cte.isRecursive(); 710 } 711 712 public TCTE getCTE() { 713 return cte; 714 } 715 716 @Override 717 public TSelectSqlStatement getSelectStatement() { 718 return selectStatement; 719 } 720 721 @Override 722 public boolean hasStarColumn() { 723 // If this CTE has a UNION subquery, delegate to the UnionNamespace 724 if (unionNamespace != null) { 725 return unionNamespace.hasStarColumn(); 726 } 727 728 if (selectStatement == null || selectStatement.getResultColumnList() == null) { 729 return false; 730 } 731 732 TResultColumnList selectList = selectStatement.getResultColumnList(); 733 for (int i = 0; i < selectList.size(); i++) { 734 TResultColumn resultCol = selectList.getResultColumn(i); 735 if (resultCol != null && resultCol.toString().endsWith("*")) { 736 return true; 737 } 738 } 739 return false; 740 } 741 742 @Override 743 public boolean supportsDynamicInference() { 744 return hasStarColumn(); 745 } 746 747 /** 748 * Slice S4 (plan §5.5): a CTE's derived schema is authoritative once the 749 * CTE has validated and produced at least one named column source. CTEs 750 * with explicit column lists are always FOUND once validated; CTEs that 751 * simply propagate {@code SELECT *} produce derived columns from the 752 * underlying namespace and only become FOUND when those columns are 753 * known. Recursive CTEs and unresolved cyclic references stay 754 * METADATA_UNAVAILABLE — S9 will refine this contract. 755 */ 756 @Override 757 public MetadataState getMetadataState() { 758 ensureValidated(); 759 if (columnSources != null && !columnSources.isEmpty()) { 760 return MetadataState.FOUND; 761 } 762 return MetadataState.METADATA_UNAVAILABLE; 763 } 764 765 @Override 766 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 767 if (columnName == null || columnName.isEmpty()) { 768 return false; 769 } 770 771 // Initialize maps if needed 772 if (inferredColumns == null) { 773 inferredColumns = new LinkedHashMap<>(); 774 } 775 if (inferredColumnNames == null) { 776 inferredColumnNames = new HashSet<>(); 777 } 778 779 // Slice S1: dedupe via matcher-aware helper so per-vendor identifier 780 // rules govern collision detection. Without this, BigQuery / MySQL 781 // (case-insensitive columns) accept "MyCol" and "MYCOL" as two entries 782 // and downstream lookups become non-deterministic. Codex round 2: 783 // the storage key is the raw (exposedName) form so two matcher-distinct 784 // identifiers that happen to normalize equally (e.g. Postgres quoted 785 // "mycol" vs unquoted MYCOL) keep separate entries. 786 if (containsColumnByMatcher(columnSources, columnName)) { 787 return false; 788 } 789 if (containsColumnByMatcher(inferredColumns, columnName)) { 790 return false; 791 } 792 793 // Collect candidate tables - get ALL final tables from the CTE chain 794 // This handles both UNION CTEs and CTEs that reference other CTEs 795 java.util.List<TTable> candidateTables = new java.util.ArrayList<>(); 796 797 // Get all final tables from this CTE's namespace (handles UNION and CTE chains) 798 java.util.List<TTable> allTables = this.getAllFinalTables(); 799 for (TTable table : allTables) { 800 if (table != null && !candidateTables.contains(table)) { 801 candidateTables.add(table); 802 } 803 } 804 805 // Create inferred column source WITH candidate tables if applicable 806 ColumnSource source = new ColumnSource( 807 this, 808 columnName, 809 null, 810 confidence, 811 evidence, 812 null, // overrideTable 813 (candidateTables != null && !candidateTables.isEmpty()) ? candidateTables : null 814 ); 815 816 inferredColumns.put(columnName, source); 817 inferredColumnNames.add(columnName); 818 819 // Propagate to nested namespaces if this CTE has SELECT * from subqueries/unions 820 propagateToNestedNamespaces(columnName, confidence, evidence); 821 822 // NOTE: Propagation to referenced CTEs (CTE chains like cte2 -> cte1) is handled 823 // by NamespaceEnhancer.propagateThroughCTEChains() which has access to the actual 824 // namespace instances from the scope tree. We don't do it here because creating 825 // new CTENamespace instances would not affect the actual instances used for resolution. 826 827 return true; 828 } 829 830 /** 831 * Propagate an inferred column to nested namespaces. 832 * 833 * This is a unified algorithm that handles: 834 * 1. Direct UNION subqueries (CTE body is a UNION) 835 * 2. SELECT * FROM (UNION) patterns 836 * 3. SELECT * FROM (subquery) patterns 837 * 4. Deeply nested structures with JOINs 838 * 839 * The propagation is recursive - each namespace that receives the column 840 * will further propagate to its own nested namespaces. 841 * 842 * @param columnName The column name to propagate 843 * @param confidence Confidence score 844 * @param evidence Evidence string for debugging 845 */ 846 private void propagateToNestedNamespaces(String columnName, double confidence, String evidence) { 847 // Case 1: Direct UNION subquery (CTE body is a UNION) 848 if (unionNamespace != null) { 849 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 850 System.out.println("[CTENamespace] Propagating '" + columnName + "' to direct unionNamespace in " + cteName); 851 } 852 unionNamespace.addInferredColumn(columnName, confidence, evidence + "_cte_union_propagate"); 853 return; 854 } 855 856 // Case 2: CTE has SELECT * from nested structures (subqueries, unions in FROM clause) 857 // Only propagate if the CTE's SELECT list contains a star column 858 if (!hasStarColumn()) { 859 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 860 System.out.println("[CTENamespace] No star column in " + cteName + ", skipping FROM clause propagation"); 861 } 862 return; 863 } 864 865 // Get or create namespaces for FROM clause tables 866 List<INamespace> fromNamespaces = getOrCreateFromClauseNamespaces(); 867 if (fromNamespaces.isEmpty()) { 868 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 869 System.out.println("[CTENamespace] No FROM clause namespaces with dynamic inference in " + cteName); 870 } 871 return; 872 } 873 874 // Propagate to each FROM clause namespace 875 for (INamespace ns : fromNamespaces) { 876 if (ns.supportsDynamicInference()) { 877 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 878 System.out.println("[CTENamespace] Propagating '" + columnName + "' to FROM clause namespace " + 879 ns.getDisplayName() + " in " + cteName); 880 } 881 // The nested namespace's addInferredColumn will recursively propagate further 882 ns.addInferredColumn(columnName, confidence, evidence + "_cte_from_propagate"); 883 } 884 } 885 } 886 887 @Override 888 public Set<String> getInferredColumns() { 889 if (inferredColumnNames == null) { 890 return Collections.emptySet(); 891 } 892 return Collections.unmodifiableSet(inferredColumnNames); 893 } 894 895 /** 896 * Binding-diagnostic view of the CTE output schema. 897 * 898 * <p>This deliberately differs from {@link #hasColumn(String)}, which has 899 * legacy compatibility fallbacks that may infer hidden base-table columns 900 * for lineage. Binding diagnostics need the SQL-visible CTE projection: 901 * explicit CTE column names, named projection columns, and star/pushdown 902 * inferred output columns are visible; hidden base-table inferences are 903 * not.</p> 904 */ 905 public ColumnLevel hasAuthoritativeOutputColumn(String columnName) { 906 ensureValidated(); 907 908 if (columnName == null || columnName.isEmpty()) { 909 return ColumnLevel.MAYBE; 910 } 911 912 if (containsColumnByMatcher(columnSources, columnName)) { 913 return ColumnLevel.EXISTS; 914 } 915 916 if (!explicitColumns.isEmpty()) { 917 return ColumnLevel.NOT_EXISTS; 918 } 919 920 if (containsAuthoritativeInferredColumn(columnName)) { 921 return ColumnLevel.EXISTS; 922 } 923 924 if (hasStarColumn()) { 925 return ColumnLevel.MAYBE; 926 } 927 928 if (columnSources != null && !columnSources.isEmpty()) { 929 return ColumnLevel.NOT_EXISTS; 930 } 931 932 return ColumnLevel.MAYBE; 933 } 934 935 /** 936 * Diagnostic-only sibling of the matcher-aware contains check that 937 * additionally filters out the legacy {@code inferred_from_cte_base_table} 938 * entries (which represent hidden base-table columns, not the CTE's 939 * SQL-visible projection). Matches must still respect per-vendor identifier 940 * rules so quoted Oracle / Postgres identifiers don't get folded. 941 */ 942 private boolean containsAuthoritativeInferredColumn(String columnName) { 943 if (inferredColumns == null || inferredColumns.isEmpty()) { 944 return false; 945 } 946 for (ColumnSource candidate : inferredColumns.values()) { 947 // Codex round 1: compare against the exposedName (original case, 948 // quotes preserved) instead of the normalized key, so quoted 949 // vs unquoted distinctions are honored on Oracle / Postgres / 950 // Snowflake / Hive / Teradata. 951 String exposed = candidate != null ? candidate.getExposedName() : null; 952 if (exposed == null || !nameMatcher.matches(exposed, columnName)) { 953 continue; 954 } 955 String evidence = candidate.getEvidence(); 956 if ("inferred_from_cte_base_table".equals(evidence)) { 957 continue; 958 } 959 return true; 960 } 961 return false; 962 } 963 964 @Override 965 public ColumnLevel hasColumn(String columnName) { 966 ensureValidated(); 967 968 // Check in explicit columns 969 if (containsColumnByMatcher(columnSources, columnName)) { 970 return ColumnLevel.EXISTS; 971 } 972 973 // Check in inferred columns. The map is raw-keyed (= ColumnSource. 974 // exposedName); the matcher-aware helper applies per-dialect rules 975 // including SQL Server COLLATION_BASED. Round 2 reverted from 976 // normalized keys because two matcher-distinct identifiers can 977 // normalize to the same key (Postgres "mycol" vs MYCOL). 978 if (containsColumnByMatcher(inferredColumns, columnName)) { 979 return ColumnLevel.EXISTS; 980 } 981 982 // If has star column, unknown columns MAYBE exist 983 if (hasStarColumn()) { 984 return ColumnLevel.MAYBE; 985 } 986 987 // If the CTE has explicit column definitions like "cte(c1, c2, c3)", then ONLY 988 // those columns exist - don't return MAYBE for other columns. 989 // This prevents ambiguous resolution when a CTE with explicit columns is joined 990 // with another table. 991 if (!explicitColumns.isEmpty()) { 992 return ColumnLevel.NOT_EXISTS; 993 } 994 995 // For CTEs without explicit columns AND without star columns, check if underlying 996 // tables might have the column. This handles cases like referencing columns from 997 // the CTE's base tables that aren't explicitly selected in the CTE's SELECT list. 998 if (selectStatement != null && selectStatement.tables != null) { 999 for (int i = 0; i < selectStatement.tables.size(); i++) { 1000 TTable table = selectStatement.tables.getTable(i); 1001 if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) { 1002 // The CTE has a base table - column might exist there 1003 return ColumnLevel.MAYBE; 1004 } 1005 } 1006 } 1007 1008 return ColumnLevel.NOT_EXISTS; 1009 } 1010 1011 @Override 1012 public ColumnSource resolveColumn(String columnName) { 1013 ensureValidated(); 1014 1015 // First check explicit columns 1016 ColumnSource source = super.resolveColumn(columnName); 1017 if (source != null) { 1018 return source; 1019 } 1020 1021 // Then check inferred columns. Slice S1 + codex round 2: the map is 1022 // raw-keyed (= ColumnSource.exposedName), so the exact-match probe 1023 // is O(1) for the same identifier queried again. For case-only- 1024 // different references, the matcher loop walks values via 1025 // getExposedName() so quote state is preserved on quoted-sensitive 1026 // dialects (Oracle / Postgres / Snowflake / Hive / Teradata). 1027 if (inferredColumns != null) { 1028 ColumnSource exact = inferredColumns.get(columnName); 1029 if (exact != null) { 1030 return exact; 1031 } 1032 for (ColumnSource entry : inferredColumns.values()) { 1033 String exposed = entry != null ? entry.getExposedName() : null; 1034 if (exposed != null && nameMatcher.matches(exposed, columnName)) { 1035 return entry; 1036 } 1037 } 1038 } 1039 1040 // If has star column, auto-infer this column 1041 if (hasStarColumn()) { 1042 boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_reference"); 1043 if (added && inferredColumns != null) { 1044 return inferredColumns.get(columnName); 1045 } 1046 } 1047 1048 // For CTEs without star columns, check if underlying base tables might have the column. 1049 // This handles references to columns that aren't explicitly selected in the CTE's SELECT list. 1050 if (selectStatement != null && selectStatement.tables != null) { 1051 for (int i = 0; i < selectStatement.tables.size(); i++) { 1052 TTable table = selectStatement.tables.getTable(i); 1053 if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) { 1054 // Create an inferred column source that traces to the base table 1055 boolean added = addInferredColumn(columnName, 0.6, "inferred_from_cte_base_table"); 1056 if (added && inferredColumns != null) { 1057 return inferredColumns.get(columnName); 1058 } 1059 break; 1060 } 1061 } 1062 } 1063 1064 return null; 1065 } 1066 1067 /** 1068 * Get the UnionNamespace if this CTE's subquery is a UNION. 1069 */ 1070 public UnionNamespace getUnionNamespace() { 1071 return unionNamespace; 1072 } 1073 1074 /** 1075 * Get or create namespaces for FROM clause tables that support dynamic inference. 1076 * This handles cases like: WITH cte AS (SELECT * FROM (UNION) sub) 1077 * where the CTE body is not directly a UNION but contains a subquery with UNION. 1078 * 1079 * The namespaces are lazily created and cached for reuse. 1080 * 1081 * @return List of namespaces that support dynamic inference (may be empty) 1082 */ 1083 private List<INamespace> getOrCreateFromClauseNamespaces() { 1084 if (fromClauseNamespaces != null) { 1085 return fromClauseNamespaces; 1086 } 1087 1088 fromClauseNamespaces = new ArrayList<>(); 1089 1090 if (selectStatement == null || selectStatement.tables == null) { 1091 return fromClauseNamespaces; 1092 } 1093 1094 // Iterate through FROM clause tables and create namespaces for those that 1095 // could have star columns (subqueries, unions, CTE references) 1096 for (int i = 0; i < selectStatement.tables.size(); i++) { 1097 TTable table = selectStatement.tables.getTable(i); 1098 if (table == null) continue; 1099 1100 INamespace ns = createNamespaceForTable(table); 1101 if (ns != null && ns.supportsDynamicInference()) { 1102 fromClauseNamespaces.add(ns); 1103 } 1104 } 1105 1106 return fromClauseNamespaces; 1107 } 1108 1109 /** 1110 * Create an appropriate namespace for a table in the FROM clause. 1111 * Handles subqueries (including UNION), CTE references, and joins recursively. 1112 * 1113 * @param table The table from the FROM clause 1114 * @return INamespace for the table, or null if not applicable 1115 */ 1116 private INamespace createNamespaceForTable(TTable table) { 1117 if (table == null) return null; 1118 1119 // Handle subquery tables 1120 if (table.getSubquery() != null) { 1121 TSelectSqlStatement subquery = table.getSubquery(); 1122 String alias = table.getAliasName(); 1123 1124 // Check if subquery is a UNION/INTERSECT/EXCEPT 1125 if (subquery.isCombinedQuery()) { 1126 UnionNamespace unionNs = new UnionNamespace(subquery, alias, nameMatcher); 1127 return unionNs; 1128 } else { 1129 // Regular subquery - create SubqueryNamespace 1130 SubqueryNamespace subNs = new SubqueryNamespace(subquery, alias, nameMatcher); 1131 subNs.validate(); 1132 return subNs; 1133 } 1134 } 1135 1136 // Handle CTE references - these are handled by NamespaceEnhancer.propagateThroughCTEChains() 1137 // We don't create new CTENamespace here because we need the actual instances from scope tree 1138 1139 // Handle JOIN tables - recursively collect from join expressions 1140 if (table.getTableType() == gudusoft.gsqlparser.ETableSource.join) { 1141 return createNamespaceForJoin(table); 1142 } 1143 1144 return null; 1145 } 1146 1147 /** 1148 * Create namespaces for tables within a JOIN expression. 1149 * Returns a composite namespace that wraps all namespaces from the join. 1150 * 1151 * @param joinTable The JOIN table 1152 * @return INamespace that wraps join namespaces, or null 1153 */ 1154 private INamespace createNamespaceForJoin(TTable joinTable) { 1155 if (joinTable == null || joinTable.getJoinExpr() == null) { 1156 return null; 1157 } 1158 1159 gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr(); 1160 1161 // Collect namespaces from both sides of the join 1162 List<INamespace> joinNamespaces = new ArrayList<>(); 1163 1164 // Left side 1165 TTable leftTable = joinExpr.getLeftTable(); 1166 if (leftTable != null) { 1167 INamespace leftNs = createNamespaceForTable(leftTable); 1168 if (leftNs != null && leftNs.supportsDynamicInference()) { 1169 joinNamespaces.add(leftNs); 1170 } 1171 } 1172 1173 // Right side 1174 TTable rightTable = joinExpr.getRightTable(); 1175 if (rightTable != null) { 1176 INamespace rightNs = createNamespaceForTable(rightTable); 1177 if (rightNs != null && rightNs.supportsDynamicInference()) { 1178 joinNamespaces.add(rightNs); 1179 } 1180 } 1181 1182 // If we found namespaces, add them to fromClauseNamespaces directly 1183 // (we don't create a composite namespace, just add the individual ones) 1184 if (!joinNamespaces.isEmpty()) { 1185 fromClauseNamespaces.addAll(joinNamespaces); 1186 } 1187 1188 return null; // Individual namespaces added directly to fromClauseNamespaces 1189 } 1190 1191 public List<String> getExplicitColumns() { 1192 return new ArrayList<>(explicitColumns); 1193 } 1194 1195 public boolean isRecursive() { 1196 return recursive; 1197 } 1198 1199 @Override 1200 public String toString() { 1201 return String.format("CTENamespace(%s, columns=%d, recursive=%s)", 1202 cteName, 1203 columnSources != null ? columnSources.size() : explicitColumns.size(), 1204 recursive 1205 ); 1206 } 1207}