001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.nodes.TCTE;
004import gudusoft.gsqlparser.nodes.TObjectName;
005import gudusoft.gsqlparser.nodes.TResultColumn;
006import gudusoft.gsqlparser.nodes.TResultColumnList;
007import gudusoft.gsqlparser.nodes.TTable;
008import gudusoft.gsqlparser.resolver2.ColumnLevel;
009import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
010import gudusoft.gsqlparser.resolver2.model.ColumnSource;
011import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
012
013import java.util.ArrayList;
014import java.util.Collections;
015import java.util.HashSet;
016import java.util.LinkedHashMap;
017import java.util.List;
018import java.util.Map;
019import java.util.Set;
020
021/**
022 * Namespace representing a Common Table Expression (CTE).
023 * Similar to SubqueryNamespace but handles CTE-specific features:
024 * - Explicit column list: WITH cte(c1, c2) AS (SELECT ...)
025 * - Recursive CTEs
026 * - Multiple references within same query
027 * - UNION subqueries: columns are pushed through to all UNION branches
028 *
029 * Example:
030 * WITH my_cte(id, name) AS (
031 *   SELECT user_id, user_name FROM users
032 * )
033 * SELECT id, name FROM my_cte;
034 */
035public class CTENamespace extends AbstractNamespace {
036
037    private final TCTE cte;
038    private final String cteName;
039    private final TSelectSqlStatement selectStatement;
040
041    /** CTE column list (explicit column names) */
042    private final List<String> explicitColumns;
043
044    /** Whether this CTE is recursive */
045    private final boolean recursive;
046
047    /** UnionNamespace if this CTE's subquery is a UNION */
048    private UnionNamespace unionNamespace;
049
050    /** Inferred columns from star push-down */
051    private Map<String, ColumnSource> inferredColumns;
052
053    /** Track inferred column names */
054    private Set<String> inferredColumnNames;
055
056    /**
057     * The TTable that references this CTE in a FROM clause.
058     * Used as fallback for getFinalTable() when there's no underlying physical table.
059     * For example: WITH cte AS (SELECT 1 AS col) SELECT col FROM cte
060     * The referencing TTable is the 'cte' in the FROM clause.
061     */
062    private TTable referencingTable;
063
064    /**
065     * Namespaces for FROM clause tables that support dynamic inference.
066     * Used to propagate inferred columns through deeply nested structures.
067     * Lazily initialized when needed.
068     *
069     * Example: WITH cte AS (SELECT * FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2) sub)
070     * The 'sub' subquery namespace is stored here for propagation.
071     */
072    private List<INamespace> fromClauseNamespaces;
073
074    public CTENamespace(TCTE cte,
075                       String cteName,
076                       TSelectSqlStatement selectStatement,
077                       INameMatcher nameMatcher) {
078        super(cte, nameMatcher);
079        this.cte = cte;
080        this.cteName = cteName;
081        this.selectStatement = selectStatement;
082        this.explicitColumns = extractExplicitColumns(cte);
083        this.recursive = isRecursiveCTE(cte);
084
085        // If the CTE's subquery is a UNION, create a UnionNamespace to handle it
086        if (selectStatement != null && selectStatement.isCombinedQuery()) {
087            this.unionNamespace = new UnionNamespace(selectStatement, cteName + "_union", nameMatcher);
088        }
089    }
090
091    public CTENamespace(TCTE cte, String cteName, TSelectSqlStatement selectStatement) {
092        this(cte, cteName, selectStatement, null);
093    }
094
095    @Override
096    public String getDisplayName() {
097        return cteName;
098    }
099
100    /**
101     * Get the TTable that references this CTE in a FROM clause.
102     */
103    public TTable getReferencingTable() {
104        return referencingTable;
105    }
106
107    /**
108     * {@inheritDoc}
109     * For CTENamespace, returns the TTable that references this CTE in the query.
110     * This is the immediate source table for columns resolved through this CTE.
111     */
112    @Override
113    public TTable getSourceTable() {
114        return referencingTable;
115    }
116
117    /**
118     * Set the TTable that references this CTE in a FROM clause.
119     * Called by ScopeBuilder when a CTE is referenced.
120     */
121    public void setReferencingTable(TTable table) {
122        this.referencingTable = table;
123    }
124
125    @Override
126    public TTable getFinalTable() {
127        // Trace through the CTE's subquery to find the underlying physical table
128        // This is similar to SubqueryNamespace.getFinalTable() but handles CTE chains
129
130        // If this CTE has a UNION subquery, delegate to UnionNamespace
131        if (unionNamespace != null) {
132            TTable unionTable = unionNamespace.getFinalTable();
133            if (unionTable != null) {
134                return unionTable;
135            }
136            // Fallback to referencing table if UNION has no physical tables
137            return referencingTable;
138        }
139
140        // If no tables in the CTE's SELECT, return the referencing table
141        // This handles CTEs like: WITH cte AS (SELECT 1 AS col)
142        if (selectStatement == null || selectStatement.tables == null || selectStatement.tables.size() == 0) {
143            return referencingTable;
144        }
145
146        // Check for qualified star column (e.g., CTE_NAME.*) first
147        TTable qualifiedStarTable = findTableFromQualifiedStar();
148        if (qualifiedStarTable != null) {
149            return qualifiedStarTable;
150        }
151
152        // For single-table CTEs, trace to the underlying table
153        TTable firstTable = selectStatement.tables.getTable(0);
154        if (firstTable == null) {
155            return null;
156        }
157
158        // If it's a physical table (not a CTE reference), return it
159        if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) {
160            return firstTable;
161        }
162
163        // If it's a CTE reference, trace through the CTE chain
164        if (firstTable.isCTEName() && firstTable.getCTE() != null) {
165            return traceTableThroughCTE(firstTable.getCTE());
166        }
167
168        // If it's a subquery, trace through it
169        if (firstTable.getSubquery() != null) {
170            SubqueryNamespace nestedNs = new SubqueryNamespace(
171                firstTable.getSubquery(),
172                firstTable.getAliasName(),
173                nameMatcher
174            );
175            nestedNs.validate();
176            TTable subTable = nestedNs.getFinalTable();
177            if (subTable != null) {
178                return subTable;
179            }
180        }
181
182        // If it's a join, get the first base table
183        if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
184            TTable joinTable = findFirstPhysicalTableFromJoin(firstTable);
185            if (joinTable != null) {
186                return joinTable;
187            }
188        }
189
190        // Fallback: return the referencing TTable (the CTE reference in FROM clause)
191        // This is used when the CTE doesn't have underlying physical tables,
192        // e.g., WITH cte AS (SELECT 1 AS col) - the columns are literals, not from tables
193        return referencingTable;
194    }
195
196    /**
197     * Find the table referenced by a qualified star column in this CTE's SELECT list.
198     * Example: SELECT other_cte.* FROM other_cte -> traces to other_cte's underlying table
199     */
200    private TTable findTableFromQualifiedStar() {
201        if (selectStatement == null || selectStatement.getResultColumnList() == null) {
202            return null;
203        }
204
205        TResultColumnList selectList = selectStatement.getResultColumnList();
206        for (int i = 0; i < selectList.size(); i++) {
207            TResultColumn resultCol = selectList.getResultColumn(i);
208            if (resultCol == null) continue;
209
210            String colStr = resultCol.toString().trim();
211            // Check if it's a qualified star (contains . before *)
212            if (colStr.endsWith("*") && colStr.contains(".")) {
213                int dotIndex = colStr.lastIndexOf('.');
214                if (dotIndex > 0) {
215                    String tablePrefix = colStr.substring(0, dotIndex).trim();
216                    // Find the table with this alias or name
217                    TTable matchingTable = findTableByAliasOrName(tablePrefix);
218                    if (matchingTable != null) {
219                        // If the matching table is a CTE reference, trace through it
220                        if (matchingTable.isCTEName() && matchingTable.getCTE() != null) {
221                            return traceTableThroughCTE(matchingTable.getCTE());
222                        }
223                        // If it's a subquery, trace through it
224                        if (matchingTable.getSubquery() != null) {
225                            SubqueryNamespace nestedNs = new SubqueryNamespace(
226                                matchingTable.getSubquery(),
227                                matchingTable.getAliasName(),
228                                nameMatcher
229                            );
230                            nestedNs.validate();
231                            return nestedNs.getFinalTable();
232                        }
233                        // If it's a physical table, return it
234                        if (matchingTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !matchingTable.isCTEName()) {
235                            return matchingTable;
236                        }
237                    }
238                }
239            }
240        }
241        return null;
242    }
243
244    /**
245     * Find a table in the FROM clause by alias or name.
246     */
247    private TTable findTableByAliasOrName(String nameOrAlias) {
248        if (selectStatement == null || selectStatement.tables == null) {
249            return null;
250        }
251
252        for (int i = 0; i < selectStatement.tables.size(); i++) {
253            TTable table = selectStatement.tables.getTable(i);
254            if (table == null) continue;
255
256            // Check alias
257            String alias = table.getAliasName();
258            if (alias != null && nameMatcher.matches(alias, nameOrAlias)) {
259                return table;
260            }
261
262            // Check table name
263            if (table.getTableName() != null && nameMatcher.matches(table.getTableName().toString(), nameOrAlias)) {
264                return table;
265            }
266        }
267        return null;
268    }
269
270    /**
271     * Trace through a CTE to find its underlying physical table.
272     * This handles CTE chains like: CTE1 -> CTE2 -> CTE3 -> physical_table
273     */
274    private TTable traceTableThroughCTE(TCTE cteNode) {
275        return traceTableThroughCTE(cteNode, new HashSet<TCTE>());
276    }
277
278    private TTable traceTableThroughCTE(TCTE cteNode, java.util.Set<TCTE> visited) {
279        if (cteNode == null || cteNode.getSubquery() == null) {
280            return null;
281        }
282
283        // Detect circular CTE references
284        if (!visited.add(cteNode)) {
285            return null;
286        }
287
288        TSelectSqlStatement cteSubquery = cteNode.getSubquery();
289
290        // Handle UNION in the CTE
291        if (cteSubquery.isCombinedQuery()) {
292            // For UNION, trace the left branch
293            TSelectSqlStatement leftStmt = cteSubquery.getLeftStmt();
294            if (leftStmt != null && leftStmt.tables != null && leftStmt.tables.size() > 0) {
295                cteSubquery = leftStmt;
296            }
297        }
298
299        if (cteSubquery.tables == null || cteSubquery.tables.size() == 0) {
300            return null;
301        }
302
303        TTable firstTable = cteSubquery.tables.getTable(0);
304        if (firstTable == null) {
305            return null;
306        }
307
308        // If it's a physical table (not CTE), we found it
309        if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !firstTable.isCTEName()) {
310            return firstTable;
311        }
312
313        // If it's another CTE reference, continue tracing
314        if (firstTable.isCTEName() && firstTable.getCTE() != null) {
315            return traceTableThroughCTE(firstTable.getCTE(), visited);
316        }
317
318        // If it's a subquery, trace through it
319        if (firstTable.getSubquery() != null) {
320            SubqueryNamespace nestedNs = new SubqueryNamespace(
321                firstTable.getSubquery(),
322                firstTable.getAliasName(),
323                nameMatcher
324            );
325            nestedNs.validate();
326            return nestedNs.getFinalTable();
327        }
328
329        // If it's a join, get the first base table
330        if (firstTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
331            return findFirstPhysicalTableFromJoin(firstTable);
332        }
333
334        return null;
335    }
336
337    /**
338     * Find the first physical table from a JOIN expression.
339     */
340    private TTable findFirstPhysicalTableFromJoin(TTable joinTable) {
341        if (joinTable == null || joinTable.getJoinExpr() == null) {
342            return null;
343        }
344
345        gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr();
346
347        // Check left side first
348        TTable leftTable = joinExpr.getLeftTable();
349        if (leftTable != null) {
350            if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !leftTable.isCTEName()) {
351                return leftTable;
352            }
353            if (leftTable.isCTEName() && leftTable.getCTE() != null) {
354                TTable traced = traceTableThroughCTE(leftTable.getCTE());
355                if (traced != null) return traced;
356            }
357            if (leftTable.getSubquery() != null) {
358                SubqueryNamespace nestedNs = new SubqueryNamespace(
359                    leftTable.getSubquery(),
360                    leftTable.getAliasName(),
361                    nameMatcher
362                );
363                nestedNs.validate();
364                return nestedNs.getFinalTable();
365            }
366            if (leftTable.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
367                return findFirstPhysicalTableFromJoin(leftTable);
368            }
369        }
370
371        // Check right side
372        TTable rightTable = joinExpr.getRightTable();
373        if (rightTable != null) {
374            if (rightTable.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !rightTable.isCTEName()) {
375                return rightTable;
376            }
377            if (rightTable.isCTEName() && rightTable.getCTE() != null) {
378                return traceTableThroughCTE(rightTable.getCTE());
379            }
380        }
381
382        return null;
383    }
384
385    @Override
386    public List<TTable> getAllFinalTables() {
387        // If this CTE has a UNION subquery, delegate to the UnionNamespace
388        if (unionNamespace != null) {
389            return unionNamespace.getAllFinalTables();
390        }
391
392        // Check if this CTE references another CTE (which might be a UNION)
393        if (selectStatement != null && selectStatement.tables != null && selectStatement.tables.size() > 0) {
394            TTable firstTable = selectStatement.tables.getTable(0);
395            if (firstTable != null && firstTable.isCTEName() && firstTable.getCTE() != null) {
396                TCTE referencedCTE = firstTable.getCTE();
397                if (referencedCTE.getSubquery() != null) {
398                    // Create a namespace for the referenced CTE to get its tables
399                    CTENamespace referencedNs = new CTENamespace(
400                        referencedCTE,
401                        referencedCTE.getTableName() != null ? referencedCTE.getTableName().toString() : "cte",
402                        referencedCTE.getSubquery(),
403                        nameMatcher
404                    );
405                    referencedNs.validate();
406                    // This will trace through the CTE chain to get all tables
407                    // including from UNION branches
408                    return referencedNs.getAllFinalTables();
409                }
410            }
411        }
412
413        // For non-UNION, non-CTE-reference CTEs, return the single final table
414        TTable finalTable = getFinalTable();
415        if (finalTable != null) {
416            return Collections.singletonList(finalTable);
417        }
418
419        return Collections.emptyList();
420    }
421
422    @Override
423    protected void doValidate() {
424        columnSources = new LinkedHashMap<>();
425
426        if (selectStatement == null || selectStatement.getResultColumnList() == null) {
427            return;
428        }
429
430        TResultColumnList selectList = selectStatement.getResultColumnList();
431
432        // If CTE has explicit column list, use it
433        if (!explicitColumns.isEmpty()) {
434            validateWithExplicitColumns(selectList);
435        } else {
436            // No explicit columns, derive from SELECT list
437            validateWithImplicitColumns(selectList);
438        }
439    }
440
441    /**
442     * Validate CTE with explicit column list.
443     * Example: WITH cte(c1, c2, c3) AS (SELECT a, b, c FROM t)
444     *
445     * Two cases are handled:
446     *
447     * 1. **Position-based (Snowflake pattern)**: CTE explicit column list + SELECT *
448     *    Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees)
449     *    - c1/c2/c3 are positional aliases for star expansion
450     *    - Without metadata: c1 -> Employees.*, c2 -> Employees.*, c3 -> Employees.*
451     *    - With metadata: c1 -> Employees.<col_1>, c2 -> Employees.<col_2>, etc.
452     *
453     * 2. **Direct mapping**: CTE explicit column list + named columns
454     *    Example: WITH cte(c1, c2) AS (SELECT id, name FROM t)
455     *    - c1 -> t.id, c2 -> t.name (1:1 positional mapping)
456     *
457     * @see <a href="star_column_pushdown.md#cte-explicit-column-list--select--snowflake-case">
458     *      Documentation: CTE Explicit Column List + SELECT *</a>
459     */
460    private void validateWithExplicitColumns(TResultColumnList selectList) {
461        // Check if the SELECT list contains only star column(s) - the position-based pattern
462        StarColumnInfo starInfo = analyzeStarColumns(selectList);
463
464        if (starInfo.isSingleStar()) {
465            // Position-based case: CTE(c1,c2,c3) AS (SELECT * FROM t)
466            // The column names c1/c2/c3 are positional aliases, not real column names
467            handleExplicitColumnsWithStar(starInfo.getStarColumn(), starInfo.getStarQualifier());
468        } else {
469            // Direct mapping case: CTE(c1,c2) AS (SELECT id, name FROM t)
470            // Each explicit column maps to corresponding SELECT list item by position
471            handleExplicitColumnsWithDirectMapping(selectList);
472        }
473    }
474
475    /**
476     * Handle CTE explicit columns when SELECT list is a star.
477     * This is the position-based (Snowflake) pattern.
478     *
479     * @param starColumn the star column (* or table.*)
480     * @param starQualifier the table qualifier if qualified star (e.g., "src" for "src.*"), or null
481     */
482    private void handleExplicitColumnsWithStar(TResultColumn starColumn, String starQualifier) {
483        // Try ordinal mapping if metadata is available
484        List<String> ordinalColumns = tryOrdinalMapping(starQualifier);
485
486        if (ordinalColumns != null && ordinalColumns.size() >= explicitColumns.size()) {
487            // Metadata available - use ordinal mapping: c1 -> Employees.<col_1>
488            for (int i = 0; i < explicitColumns.size(); i++) {
489                String cteColName = explicitColumns.get(i);
490                String baseColName = ordinalColumns.get(i);
491
492                ColumnSource source = new ColumnSource(
493                    this,
494                    cteColName,
495                    starColumn,      // Reference to star column
496                    1.0,             // High confidence - ordinal mapping from metadata
497                    "cte_explicit_column_ordinal:" + baseColName
498                );
499                columnSources.put(cteColName, source);
500            }
501        } else {
502            // No metadata - fallback to star reference: c1 -> Employees.*
503            for (String colName : explicitColumns) {
504                ColumnSource source = new ColumnSource(
505                    this,
506                    colName,
507                    starColumn,      // Reference to star column
508                    0.8,             // Lower confidence - ordinal mapping unknown
509                    "cte_explicit_column_via_star"
510                );
511                columnSources.put(colName, source);
512            }
513        }
514    }
515
516    /**
517     * Handle CTE explicit columns with direct positional mapping to SELECT list.
518     *
519     * @param selectList the SELECT list to map from
520     */
521    private void handleExplicitColumnsWithDirectMapping(TResultColumnList selectList) {
522        int columnCount = Math.min(explicitColumns.size(), selectList.size());
523
524        for (int i = 0; i < columnCount; i++) {
525            String colName = explicitColumns.get(i);
526            TResultColumn resultCol = selectList.getResultColumn(i);
527
528            ColumnSource source = new ColumnSource(
529                this,
530                colName,
531                resultCol,
532                1.0,  // Definite - direct positional mapping
533                "cte_explicit_column"
534            );
535
536            columnSources.put(colName, source);
537        }
538    }
539
540    /**
541     * Try to get ordered column names from metadata for ordinal mapping.
542     *
543     * @param starQualifier the table qualifier (e.g., "src"), or null for unqualified star
544     * @return ordered list of column names from metadata, or null if not available
545     */
546    private List<String> tryOrdinalMapping(String starQualifier) {
547        // TODO: When metadata (TSQLEnv/DDL) is available, return ordered column list
548        // For now, return null to use the fallback (star reference)
549        //
550        // Future implementation:
551        // 1. Find the source table namespace by starQualifier
552        // 2. Get its column sources (which use LinkedHashMap for insertion order)
553        // 3. Return the column names in order
554        return null;
555    }
556
557    /**
558     * Analyze star columns in the SELECT list.
559     * Determines if the SELECT is a single star column pattern.
560     */
561    private StarColumnInfo analyzeStarColumns(TResultColumnList selectList) {
562        if (selectList == null || selectList.size() == 0) {
563            return new StarColumnInfo();
564        }
565
566        // Check for single star column pattern
567        if (selectList.size() == 1) {
568            TResultColumn rc = selectList.getResultColumn(0);
569            if (isStarColumn(rc)) {
570                String qualifier = getStarQualifier(rc);
571                return new StarColumnInfo(rc, qualifier);
572            }
573        }
574
575        return new StarColumnInfo();
576    }
577
578    /**
579     * Check if a result column is a star column (* or table.*)
580     */
581    private boolean isStarColumn(TResultColumn rc) {
582        if (rc == null) {
583            return false;
584        }
585        String str = rc.toString();
586        return str != null && (str.equals("*") || str.endsWith(".*"));
587    }
588
589    /**
590     * Get the qualifier from a qualified star (src.* returns "src")
591     */
592    private String getStarQualifier(TResultColumn rc) {
593        if (rc == null) {
594            return null;
595        }
596        String str = rc.toString();
597        if (str != null && str.endsWith(".*") && str.length() > 2) {
598            return str.substring(0, str.length() - 2);
599        }
600        return null;
601    }
602
603    /**
604     * Helper class to hold star column analysis results.
605     */
606    private static class StarColumnInfo {
607        private final TResultColumn starColumn;
608        private final String starQualifier;
609
610        StarColumnInfo() {
611            this.starColumn = null;
612            this.starQualifier = null;
613        }
614
615        StarColumnInfo(TResultColumn starColumn, String starQualifier) {
616            this.starColumn = starColumn;
617            this.starQualifier = starQualifier;
618        }
619
620        boolean isSingleStar() {
621            return starColumn != null;
622        }
623
624        TResultColumn getStarColumn() {
625            return starColumn;
626        }
627
628        String getStarQualifier() {
629            return starQualifier;
630        }
631    }
632
633    /**
634     * Validate CTE without explicit column list.
635     * Example: WITH cte AS (SELECT id, name FROM users)
636     */
637    private void validateWithImplicitColumns(TResultColumnList selectList) {
638        for (int i = 0; i < selectList.size(); i++) {
639            TResultColumn resultCol = selectList.getResultColumn(i);
640
641            // Determine column name
642            String colName = getColumnName(resultCol);
643            if (colName == null) {
644                colName = "col_" + (i + 1);
645            }
646
647            // Create column source
648            ColumnSource source = new ColumnSource(
649                this,
650                colName,
651                resultCol,
652                1.0,  // Definite - from SELECT list
653                "cte_implicit_column"
654            );
655
656            columnSources.put(colName, source);
657        }
658    }
659
660    /**
661     * Extract column name from TResultColumn
662     */
663    private String getColumnName(TResultColumn resultCol) {
664        // Check for alias
665        if (resultCol.getAliasClause() != null &&
666            resultCol.getAliasClause().getAliasName() != null) {
667            return resultCol.getAliasClause().getAliasName().toString();
668        }
669
670        // Check for simple column reference
671        if (resultCol.getExpr() != null) {
672            gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr();
673            if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
674                TObjectName objName = expr.getObjectOperand();
675                if (objName != null) {
676                    return objName.getColumnNameOnly();
677                }
678            }
679        }
680
681        return null;
682    }
683
684    /**
685     * Extract explicit column list from CTE
686     */
687    private List<String> extractExplicitColumns(TCTE cte) {
688        List<String> columns = new ArrayList<>();
689
690        if (cte != null && cte.getColumnList() != null) {
691            for (int i = 0; i < cte.getColumnList().size(); i++) {
692                TObjectName colName = cte.getColumnList().getObjectName(i);
693                if (colName != null) {
694                    columns.add(colName.toString());
695                }
696            }
697        }
698
699        return columns;
700    }
701
702    /**
703     * Check if this is a recursive CTE
704     */
705    private boolean isRecursiveCTE(TCTE cte) {
706        if (cte == null) {
707            return false;
708        }
709        return cte.isRecursive();
710    }
711
712    public TCTE getCTE() {
713        return cte;
714    }
715
716    @Override
717    public TSelectSqlStatement getSelectStatement() {
718        return selectStatement;
719    }
720
721    @Override
722    public boolean hasStarColumn() {
723        // If this CTE has a UNION subquery, delegate to the UnionNamespace
724        if (unionNamespace != null) {
725            return unionNamespace.hasStarColumn();
726        }
727
728        if (selectStatement == null || selectStatement.getResultColumnList() == null) {
729            return false;
730        }
731
732        TResultColumnList selectList = selectStatement.getResultColumnList();
733        for (int i = 0; i < selectList.size(); i++) {
734            TResultColumn resultCol = selectList.getResultColumn(i);
735            if (resultCol != null && resultCol.toString().endsWith("*")) {
736                return true;
737            }
738        }
739        return false;
740    }
741
742    @Override
743    public boolean supportsDynamicInference() {
744        return hasStarColumn();
745    }
746
747    /**
748     * Slice S4 (plan §5.5): a CTE's derived schema is authoritative once the
749     * CTE has validated and produced at least one named column source. CTEs
750     * with explicit column lists are always FOUND once validated; CTEs that
751     * simply propagate {@code SELECT *} produce derived columns from the
752     * underlying namespace and only become FOUND when those columns are
753     * known. Recursive CTEs and unresolved cyclic references stay
754     * METADATA_UNAVAILABLE — S9 will refine this contract.
755     */
756    @Override
757    public MetadataState getMetadataState() {
758        ensureValidated();
759        if (columnSources != null && !columnSources.isEmpty()) {
760            return MetadataState.FOUND;
761        }
762        return MetadataState.METADATA_UNAVAILABLE;
763    }
764
765    @Override
766    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
767        if (columnName == null || columnName.isEmpty()) {
768            return false;
769        }
770
771        // Initialize maps if needed
772        if (inferredColumns == null) {
773            inferredColumns = new LinkedHashMap<>();
774        }
775        if (inferredColumnNames == null) {
776            inferredColumnNames = new HashSet<>();
777        }
778
779        // Slice S1: dedupe via matcher-aware helper so per-vendor identifier
780        // rules govern collision detection. Without this, BigQuery / MySQL
781        // (case-insensitive columns) accept "MyCol" and "MYCOL" as two entries
782        // and downstream lookups become non-deterministic. Codex round 2:
783        // the storage key is the raw (exposedName) form so two matcher-distinct
784        // identifiers that happen to normalize equally (e.g. Postgres quoted
785        // "mycol" vs unquoted MYCOL) keep separate entries.
786        if (containsColumnByMatcher(columnSources, columnName)) {
787            return false;
788        }
789        if (containsColumnByMatcher(inferredColumns, columnName)) {
790            return false;
791        }
792
793        // Collect candidate tables - get ALL final tables from the CTE chain
794        // This handles both UNION CTEs and CTEs that reference other CTEs
795        java.util.List<TTable> candidateTables = new java.util.ArrayList<>();
796
797        // Get all final tables from this CTE's namespace (handles UNION and CTE chains)
798        java.util.List<TTable> allTables = this.getAllFinalTables();
799        for (TTable table : allTables) {
800            if (table != null && !candidateTables.contains(table)) {
801                candidateTables.add(table);
802            }
803        }
804
805        // Create inferred column source WITH candidate tables if applicable
806        ColumnSource source = new ColumnSource(
807            this,
808            columnName,
809            null,
810            confidence,
811            evidence,
812            null,  // overrideTable
813            (candidateTables != null && !candidateTables.isEmpty()) ? candidateTables : null
814        );
815
816        inferredColumns.put(columnName, source);
817        inferredColumnNames.add(columnName);
818
819        // Propagate to nested namespaces if this CTE has SELECT * from subqueries/unions
820        propagateToNestedNamespaces(columnName, confidence, evidence);
821
822        // NOTE: Propagation to referenced CTEs (CTE chains like cte2 -> cte1) is handled
823        // by NamespaceEnhancer.propagateThroughCTEChains() which has access to the actual
824        // namespace instances from the scope tree. We don't do it here because creating
825        // new CTENamespace instances would not affect the actual instances used for resolution.
826
827        return true;
828    }
829
830    /**
831     * Propagate an inferred column to nested namespaces.
832     *
833     * This is a unified algorithm that handles:
834     * 1. Direct UNION subqueries (CTE body is a UNION)
835     * 2. SELECT * FROM (UNION) patterns
836     * 3. SELECT * FROM (subquery) patterns
837     * 4. Deeply nested structures with JOINs
838     *
839     * The propagation is recursive - each namespace that receives the column
840     * will further propagate to its own nested namespaces.
841     *
842     * @param columnName The column name to propagate
843     * @param confidence Confidence score
844     * @param evidence Evidence string for debugging
845     */
846    private void propagateToNestedNamespaces(String columnName, double confidence, String evidence) {
847        // Case 1: Direct UNION subquery (CTE body is a UNION)
848        if (unionNamespace != null) {
849            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
850                System.out.println("[CTENamespace] Propagating '" + columnName + "' to direct unionNamespace in " + cteName);
851            }
852            unionNamespace.addInferredColumn(columnName, confidence, evidence + "_cte_union_propagate");
853            return;
854        }
855
856        // Case 2: CTE has SELECT * from nested structures (subqueries, unions in FROM clause)
857        // Only propagate if the CTE's SELECT list contains a star column
858        if (!hasStarColumn()) {
859            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
860                System.out.println("[CTENamespace] No star column in " + cteName + ", skipping FROM clause propagation");
861            }
862            return;
863        }
864
865        // Get or create namespaces for FROM clause tables
866        List<INamespace> fromNamespaces = getOrCreateFromClauseNamespaces();
867        if (fromNamespaces.isEmpty()) {
868            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
869                System.out.println("[CTENamespace] No FROM clause namespaces with dynamic inference in " + cteName);
870            }
871            return;
872        }
873
874        // Propagate to each FROM clause namespace
875        for (INamespace ns : fromNamespaces) {
876            if (ns.supportsDynamicInference()) {
877                if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
878                    System.out.println("[CTENamespace] Propagating '" + columnName + "' to FROM clause namespace " +
879                        ns.getDisplayName() + " in " + cteName);
880                }
881                // The nested namespace's addInferredColumn will recursively propagate further
882                ns.addInferredColumn(columnName, confidence, evidence + "_cte_from_propagate");
883            }
884        }
885    }
886
887    @Override
888    public Set<String> getInferredColumns() {
889        if (inferredColumnNames == null) {
890            return Collections.emptySet();
891        }
892        return Collections.unmodifiableSet(inferredColumnNames);
893    }
894
895    /**
896     * Binding-diagnostic view of the CTE output schema.
897     *
898     * <p>This deliberately differs from {@link #hasColumn(String)}, which has
899     * legacy compatibility fallbacks that may infer hidden base-table columns
900     * for lineage. Binding diagnostics need the SQL-visible CTE projection:
901     * explicit CTE column names, named projection columns, and star/pushdown
902     * inferred output columns are visible; hidden base-table inferences are
903     * not.</p>
904     */
905    public ColumnLevel hasAuthoritativeOutputColumn(String columnName) {
906        ensureValidated();
907
908        if (columnName == null || columnName.isEmpty()) {
909            return ColumnLevel.MAYBE;
910        }
911
912        if (containsColumnByMatcher(columnSources, columnName)) {
913            return ColumnLevel.EXISTS;
914        }
915
916        if (!explicitColumns.isEmpty()) {
917            return ColumnLevel.NOT_EXISTS;
918        }
919
920        if (containsAuthoritativeInferredColumn(columnName)) {
921            return ColumnLevel.EXISTS;
922        }
923
924        if (hasStarColumn()) {
925            return ColumnLevel.MAYBE;
926        }
927
928        if (columnSources != null && !columnSources.isEmpty()) {
929            return ColumnLevel.NOT_EXISTS;
930        }
931
932        return ColumnLevel.MAYBE;
933    }
934
935    /**
936     * Diagnostic-only sibling of the matcher-aware contains check that
937     * additionally filters out the legacy {@code inferred_from_cte_base_table}
938     * entries (which represent hidden base-table columns, not the CTE's
939     * SQL-visible projection). Matches must still respect per-vendor identifier
940     * rules so quoted Oracle / Postgres identifiers don't get folded.
941     */
942    private boolean containsAuthoritativeInferredColumn(String columnName) {
943        if (inferredColumns == null || inferredColumns.isEmpty()) {
944            return false;
945        }
946        for (ColumnSource candidate : inferredColumns.values()) {
947            // Codex round 1: compare against the exposedName (original case,
948            // quotes preserved) instead of the normalized key, so quoted
949            // vs unquoted distinctions are honored on Oracle / Postgres /
950            // Snowflake / Hive / Teradata.
951            String exposed = candidate != null ? candidate.getExposedName() : null;
952            if (exposed == null || !nameMatcher.matches(exposed, columnName)) {
953                continue;
954            }
955            String evidence = candidate.getEvidence();
956            if ("inferred_from_cte_base_table".equals(evidence)) {
957                continue;
958            }
959            return true;
960        }
961        return false;
962    }
963
964    @Override
965    public ColumnLevel hasColumn(String columnName) {
966        ensureValidated();
967
968        // Check in explicit columns
969        if (containsColumnByMatcher(columnSources, columnName)) {
970            return ColumnLevel.EXISTS;
971        }
972
973        // Check in inferred columns. The map is raw-keyed (= ColumnSource.
974        // exposedName); the matcher-aware helper applies per-dialect rules
975        // including SQL Server COLLATION_BASED. Round 2 reverted from
976        // normalized keys because two matcher-distinct identifiers can
977        // normalize to the same key (Postgres "mycol" vs MYCOL).
978        if (containsColumnByMatcher(inferredColumns, columnName)) {
979            return ColumnLevel.EXISTS;
980        }
981
982        // If has star column, unknown columns MAYBE exist
983        if (hasStarColumn()) {
984            return ColumnLevel.MAYBE;
985        }
986
987        // If the CTE has explicit column definitions like "cte(c1, c2, c3)", then ONLY
988        // those columns exist - don't return MAYBE for other columns.
989        // This prevents ambiguous resolution when a CTE with explicit columns is joined
990        // with another table.
991        if (!explicitColumns.isEmpty()) {
992            return ColumnLevel.NOT_EXISTS;
993        }
994
995        // For CTEs without explicit columns AND without star columns, check if underlying
996        // tables might have the column. This handles cases like referencing columns from
997        // the CTE's base tables that aren't explicitly selected in the CTE's SELECT list.
998        if (selectStatement != null && selectStatement.tables != null) {
999            for (int i = 0; i < selectStatement.tables.size(); i++) {
1000                TTable table = selectStatement.tables.getTable(i);
1001                if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) {
1002                    // The CTE has a base table - column might exist there
1003                    return ColumnLevel.MAYBE;
1004                }
1005            }
1006        }
1007
1008        return ColumnLevel.NOT_EXISTS;
1009    }
1010
1011    @Override
1012    public ColumnSource resolveColumn(String columnName) {
1013        ensureValidated();
1014
1015        // First check explicit columns
1016        ColumnSource source = super.resolveColumn(columnName);
1017        if (source != null) {
1018            return source;
1019        }
1020
1021        // Then check inferred columns. Slice S1 + codex round 2: the map is
1022        // raw-keyed (= ColumnSource.exposedName), so the exact-match probe
1023        // is O(1) for the same identifier queried again. For case-only-
1024        // different references, the matcher loop walks values via
1025        // getExposedName() so quote state is preserved on quoted-sensitive
1026        // dialects (Oracle / Postgres / Snowflake / Hive / Teradata).
1027        if (inferredColumns != null) {
1028            ColumnSource exact = inferredColumns.get(columnName);
1029            if (exact != null) {
1030                return exact;
1031            }
1032            for (ColumnSource entry : inferredColumns.values()) {
1033                String exposed = entry != null ? entry.getExposedName() : null;
1034                if (exposed != null && nameMatcher.matches(exposed, columnName)) {
1035                    return entry;
1036                }
1037            }
1038        }
1039
1040        // If has star column, auto-infer this column
1041        if (hasStarColumn()) {
1042            boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_reference");
1043            if (added && inferredColumns != null) {
1044                return inferredColumns.get(columnName);
1045            }
1046        }
1047
1048        // For CTEs without star columns, check if underlying base tables might have the column.
1049        // This handles references to columns that aren't explicitly selected in the CTE's SELECT list.
1050        if (selectStatement != null && selectStatement.tables != null) {
1051            for (int i = 0; i < selectStatement.tables.size(); i++) {
1052                TTable table = selectStatement.tables.getTable(i);
1053                if (table != null && table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) {
1054                    // Create an inferred column source that traces to the base table
1055                    boolean added = addInferredColumn(columnName, 0.6, "inferred_from_cte_base_table");
1056                    if (added && inferredColumns != null) {
1057                        return inferredColumns.get(columnName);
1058                    }
1059                    break;
1060                }
1061            }
1062        }
1063
1064        return null;
1065    }
1066
1067    /**
1068     * Get the UnionNamespace if this CTE's subquery is a UNION.
1069     */
1070    public UnionNamespace getUnionNamespace() {
1071        return unionNamespace;
1072    }
1073
1074    /**
1075     * Get or create namespaces for FROM clause tables that support dynamic inference.
1076     * This handles cases like: WITH cte AS (SELECT * FROM (UNION) sub)
1077     * where the CTE body is not directly a UNION but contains a subquery with UNION.
1078     *
1079     * The namespaces are lazily created and cached for reuse.
1080     *
1081     * @return List of namespaces that support dynamic inference (may be empty)
1082     */
1083    private List<INamespace> getOrCreateFromClauseNamespaces() {
1084        if (fromClauseNamespaces != null) {
1085            return fromClauseNamespaces;
1086        }
1087
1088        fromClauseNamespaces = new ArrayList<>();
1089
1090        if (selectStatement == null || selectStatement.tables == null) {
1091            return fromClauseNamespaces;
1092        }
1093
1094        // Iterate through FROM clause tables and create namespaces for those that
1095        // could have star columns (subqueries, unions, CTE references)
1096        for (int i = 0; i < selectStatement.tables.size(); i++) {
1097            TTable table = selectStatement.tables.getTable(i);
1098            if (table == null) continue;
1099
1100            INamespace ns = createNamespaceForTable(table);
1101            if (ns != null && ns.supportsDynamicInference()) {
1102                fromClauseNamespaces.add(ns);
1103            }
1104        }
1105
1106        return fromClauseNamespaces;
1107    }
1108
1109    /**
1110     * Create an appropriate namespace for a table in the FROM clause.
1111     * Handles subqueries (including UNION), CTE references, and joins recursively.
1112     *
1113     * @param table The table from the FROM clause
1114     * @return INamespace for the table, or null if not applicable
1115     */
1116    private INamespace createNamespaceForTable(TTable table) {
1117        if (table == null) return null;
1118
1119        // Handle subquery tables
1120        if (table.getSubquery() != null) {
1121            TSelectSqlStatement subquery = table.getSubquery();
1122            String alias = table.getAliasName();
1123
1124            // Check if subquery is a UNION/INTERSECT/EXCEPT
1125            if (subquery.isCombinedQuery()) {
1126                UnionNamespace unionNs = new UnionNamespace(subquery, alias, nameMatcher);
1127                return unionNs;
1128            } else {
1129                // Regular subquery - create SubqueryNamespace
1130                SubqueryNamespace subNs = new SubqueryNamespace(subquery, alias, nameMatcher);
1131                subNs.validate();
1132                return subNs;
1133            }
1134        }
1135
1136        // Handle CTE references - these are handled by NamespaceEnhancer.propagateThroughCTEChains()
1137        // We don't create new CTENamespace here because we need the actual instances from scope tree
1138
1139        // Handle JOIN tables - recursively collect from join expressions
1140        if (table.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
1141            return createNamespaceForJoin(table);
1142        }
1143
1144        return null;
1145    }
1146
1147    /**
1148     * Create namespaces for tables within a JOIN expression.
1149     * Returns a composite namespace that wraps all namespaces from the join.
1150     *
1151     * @param joinTable The JOIN table
1152     * @return INamespace that wraps join namespaces, or null
1153     */
1154    private INamespace createNamespaceForJoin(TTable joinTable) {
1155        if (joinTable == null || joinTable.getJoinExpr() == null) {
1156            return null;
1157        }
1158
1159        gudusoft.gsqlparser.nodes.TJoinExpr joinExpr = joinTable.getJoinExpr();
1160
1161        // Collect namespaces from both sides of the join
1162        List<INamespace> joinNamespaces = new ArrayList<>();
1163
1164        // Left side
1165        TTable leftTable = joinExpr.getLeftTable();
1166        if (leftTable != null) {
1167            INamespace leftNs = createNamespaceForTable(leftTable);
1168            if (leftNs != null && leftNs.supportsDynamicInference()) {
1169                joinNamespaces.add(leftNs);
1170            }
1171        }
1172
1173        // Right side
1174        TTable rightTable = joinExpr.getRightTable();
1175        if (rightTable != null) {
1176            INamespace rightNs = createNamespaceForTable(rightTable);
1177            if (rightNs != null && rightNs.supportsDynamicInference()) {
1178                joinNamespaces.add(rightNs);
1179            }
1180        }
1181
1182        // If we found namespaces, add them to fromClauseNamespaces directly
1183        // (we don't create a composite namespace, just add the individual ones)
1184        if (!joinNamespaces.isEmpty()) {
1185            fromClauseNamespaces.addAll(joinNamespaces);
1186        }
1187
1188        return null; // Individual namespaces added directly to fromClauseNamespaces
1189    }
1190
1191    public List<String> getExplicitColumns() {
1192        return new ArrayList<>(explicitColumns);
1193    }
1194
1195    public boolean isRecursive() {
1196        return recursive;
1197    }
1198
1199    @Override
1200    public String toString() {
1201        return String.format("CTENamespace(%s, columns=%d, recursive=%s)",
1202            cteName,
1203            columnSources != null ? columnSources.size() : explicitColumns.size(),
1204            recursive
1205        );
1206    }
1207}