001package gudusoft.gsqlparser.resolver2.model;
002
003import gudusoft.gsqlparser.EExpressionType;
004import gudusoft.gsqlparser.nodes.TExpression;
005import gudusoft.gsqlparser.nodes.TObjectName;
006import gudusoft.gsqlparser.nodes.TParseTreeNode;
007import gudusoft.gsqlparser.nodes.TResultColumn;
008import gudusoft.gsqlparser.nodes.TTable;
009import gudusoft.gsqlparser.resolver2.inference.EvidenceType;
010import gudusoft.gsqlparser.resolver2.namespace.INamespace;
011import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace;
012import gudusoft.gsqlparser.resolver2.namespace.CTENamespace;
013import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace;
014import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
015
016import java.util.Collections;
017import java.util.List;
018
019/**
020 * Represents the source of a column reference.
021 * Tracks where a column comes from, including intermediate transformations
022 * through subqueries and CTEs.
023 *
024 * Design principles:
025 * 1. Immutable - once created, cannot be modified
026 * 2. Recursive - can trace back through subquery/CTE layers
027 * 3. Confidence-scored - supports evidence-based inference
028 */
029public class ColumnSource {
030    /** The namespace where this column is exposed (e.g., subquery, table) */
031    private final INamespace sourceNamespace;
032
033    /** The name by which this column is exposed in the namespace */
034    private final String exposedName;
035
036    /** The AST node where this column is defined (TResultColumn, TTableColumn, etc.) */
037    private final TParseTreeNode definitionNode;
038
039    /** Location information for the definition */
040    private final SourceLocation definitionLocation;
041
042    /**
043     * Confidence score [0.0, 1.0]:
044     * - 1.0: Definite (from metadata or explicit definition)
045     * - 0.7-0.9: High confidence inference (strong evidence)
046     * - 0.5-0.7: Medium confidence inference (some evidence)
047     * - 0.0-0.5: Low confidence guess
048     */
049    private final double confidence;
050
051    /**
052     * Evidence that supports this resolution.
053     * Used for debugging and explaining inference decisions.
054     *
055     * @deprecated Use {@link #evidenceDetail} instead. This field is kept for backward
056     *             compatibility and will be derived from evidenceDetail if not explicitly set.
057     */
058    private final String evidence;
059
060    /**
061     * Structured evidence detail for this resolution.
062     * Provides type-safe evidence with confidence weight and source traceability.
063     * This is the preferred way to access resolution evidence.
064     *
065     * @see ResolutionEvidence
066     */
067    private final ResolutionEvidence evidenceDetail;
068
069    /**
070     * Override table for traced columns.
071     * When set, getFinalTable() returns this instead of namespace's table.
072     */
073    private final TTable overrideTable;
074
075    /**
076     * Candidate tables for ambiguous columns.
077     * When a column could come from multiple tables (e.g., SELECT * FROM t1, t2),
078     * this list contains all possible source tables so end users can access them.
079     */
080    private final List<TTable> candidateTables;
081
082    /**
083     * Field path for deep/record field access (e.g., struct.field.subfield).
084     *
085     * <p>When a column reference includes field access beyond the base column,
086     * this captures the field path. For example, in {@code customer.address.city},
087     * if base column is {@code customer}, fieldPath contains {@code ["address", "city"]}.</p>
088     *
089     * <p>This field is null or empty for regular column references without field access.</p>
090     *
091     * @see FieldPath
092     */
093    private final FieldPath fieldPath;
094
095    public ColumnSource(INamespace sourceNamespace,
096                       String exposedName,
097                       TParseTreeNode definitionNode,
098                       double confidence,
099                       String evidence) {
100        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, null, null);
101    }
102
103    public ColumnSource(INamespace sourceNamespace,
104                       String exposedName,
105                       TParseTreeNode definitionNode,
106                       double confidence,
107                       String evidence,
108                       TTable overrideTable) {
109        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, null);
110    }
111
112    public ColumnSource(INamespace sourceNamespace,
113                       String exposedName,
114                       TParseTreeNode definitionNode,
115                       double confidence,
116                       String evidence,
117                       TTable overrideTable,
118                       List<TTable> candidateTables) {
119        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, null, null);
120    }
121
122    /**
123     * Full constructor with all fields including ResolutionEvidence.
124     */
125    public ColumnSource(INamespace sourceNamespace,
126                       String exposedName,
127                       TParseTreeNode definitionNode,
128                       double confidence,
129                       String evidence,
130                       TTable overrideTable,
131                       List<TTable> candidateTables,
132                       ResolutionEvidence evidenceDetail) {
133        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, evidenceDetail, null);
134    }
135
136    /**
137     * Full constructor with all fields including ResolutionEvidence and FieldPath.
138     *
139     * @param sourceNamespace The namespace where this column is exposed
140     * @param exposedName The name by which this column is exposed
141     * @param definitionNode The AST node where this column is defined
142     * @param confidence Confidence score [0.0, 1.0]
143     * @param evidence Evidence string for this resolution
144     * @param overrideTable Override table for traced columns
145     * @param candidateTables Candidate tables for ambiguous columns
146     * @param evidenceDetail Structured evidence detail
147     * @param fieldPath Field path for deep/record field access
148     */
149    public ColumnSource(INamespace sourceNamespace,
150                       String exposedName,
151                       TParseTreeNode definitionNode,
152                       double confidence,
153                       String evidence,
154                       TTable overrideTable,
155                       List<TTable> candidateTables,
156                       ResolutionEvidence evidenceDetail,
157                       FieldPath fieldPath) {
158        this.sourceNamespace = sourceNamespace;
159        this.exposedName = exposedName;
160        this.definitionNode = definitionNode;
161        this.definitionLocation = definitionNode != null
162            ? new SourceLocation(definitionNode)
163            : null;
164        this.confidence = Math.max(0.0, Math.min(1.0, confidence));
165        this.evidence = evidence;
166        this.overrideTable = overrideTable;
167        this.candidateTables = candidateTables != null ? Collections.unmodifiableList(candidateTables) : null;
168        this.fieldPath = fieldPath;
169        // If evidenceDetail not provided, create from legacy evidence
170        if (evidenceDetail != null) {
171            this.evidenceDetail = evidenceDetail;
172        } else if (evidence != null) {
173            this.evidenceDetail = ResolutionEvidence.fromLegacyEvidence(evidence, confidence, definitionNode);
174        } else {
175            this.evidenceDetail = null;
176        }
177    }
178
179    /**
180     * Constructor with ResolutionEvidence (preferred for new code).
181     */
182    public ColumnSource(INamespace sourceNamespace,
183                       String exposedName,
184                       TParseTreeNode definitionNode,
185                       ResolutionEvidence evidenceDetail) {
186        this(sourceNamespace, exposedName, definitionNode,
187             evidenceDetail != null ? evidenceDetail.getWeight() : 1.0,
188             evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata",
189             null, null, evidenceDetail);
190    }
191
192    /**
193     * Constructor with ResolutionEvidence and override table.
194     */
195    public ColumnSource(INamespace sourceNamespace,
196                       String exposedName,
197                       TParseTreeNode definitionNode,
198                       ResolutionEvidence evidenceDetail,
199                       TTable overrideTable) {
200        this(sourceNamespace, exposedName, definitionNode,
201             evidenceDetail != null ? evidenceDetail.getWeight() : 1.0,
202             evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata",
203             overrideTable, null, evidenceDetail);
204    }
205
206    /**
207     * Constructor for definite matches (confidence = 1.0)
208     */
209    public ColumnSource(INamespace sourceNamespace,
210                       String exposedName,
211                       TParseTreeNode definitionNode) {
212        this(sourceNamespace, exposedName, definitionNode, 1.0, "metadata");
213    }
214
215    public INamespace getSourceNamespace() {
216        return sourceNamespace;
217    }
218
219    public String getExposedName() {
220        return exposedName;
221    }
222
223    public TParseTreeNode getDefinitionNode() {
224        return definitionNode;
225    }
226
227    public SourceLocation getDefinitionLocation() {
228        return definitionLocation;
229    }
230
231    public double getConfidence() {
232        return confidence;
233    }
234
235    public String getEvidence() {
236        return evidence;
237    }
238
239    /**
240     * Get the structured evidence detail for this resolution.
241     *
242     * <p>This is the preferred way to access resolution evidence as it provides:
243     * <ul>
244     *   <li>Type-safe evidence type (enum)</li>
245     *   <li>Confidence weight with clear semantics</li>
246     *   <li>Source location for traceability</li>
247     *   <li>Human-readable messages</li>
248     * </ul>
249     *
250     * @return The structured evidence detail, or null if not available
251     */
252    public ResolutionEvidence getEvidenceDetail() {
253        return evidenceDetail;
254    }
255
256    /**
257     * Get the evidence type from the structured evidence detail.
258     * Convenience method for common use cases.
259     *
260     * @return The evidence type, or null if no evidence detail
261     */
262    public EvidenceType getEvidenceType() {
263        return evidenceDetail != null ? evidenceDetail.getType() : null;
264    }
265
266    /**
267     * Check if this resolution has definite evidence (not inferred).
268     * Definite evidence comes from DDL, metadata, or explicit definitions.
269     *
270     * @return true if evidence is definite
271     */
272    public boolean hasDefiniteEvidence() {
273        if (evidenceDetail != null) {
274            return evidenceDetail.isDefinite();
275        }
276        // Fallback: check legacy evidence and confidence
277        if (confidence >= 1.0) {
278            return true;
279        }
280        if (evidence != null) {
281            String lower = evidence.toLowerCase();
282            return lower.contains("metadata") || lower.contains("ddl") ||
283                   lower.contains("explicit") || lower.contains("insert_column");
284        }
285        return false;
286    }
287
288    /**
289     * Get the <b>final</b> physical table this column originates from after tracing
290     * through all subqueries and CTEs.
291     *
292     * <h3>Semantic Difference: getFinalTable() vs TObjectName.getSourceTable()</h3>
293     * <ul>
294     *   <li><b>getFinalTable()</b> (this method): The final physical table after
295     *       recursively tracing through all subqueries and CTEs. Use this for data lineage.</li>
296     *   <li><b>TObjectName.getSourceTable()</b>: The immediate source in the current scope.
297     *       For a column from a subquery, this points to the subquery's TTable itself.</li>
298     * </ul>
299     *
300     * <h3>Example</h3>
301     * <pre>{@code
302     * SELECT title FROM (SELECT * FROM books) sub
303     *
304     * For the 'title' column in outer SELECT:
305     * - TObjectName.getSourceTable() → TTable for subquery 'sub' (immediate source)
306     * - ColumnSource.getFinalTable() → TTable for 'books' (final physical table)
307     * }</pre>
308     *
309     * <p>For calculated columns in subqueries (expressions like {@code START_DT - x AS alias}),
310     * this returns null because such calculated columns don't originate from a physical
311     * table - they are derived values computed in the subquery.</p>
312     *
313     * <p>Note: For CTEs, calculated columns ARE the CTE's own columns, so they trace
314     * to the CTE itself (handled by CTENamespace.getFinalTable()).</p>
315     *
316     * @return The physical table, or null if unable to determine or if calculated in subquery
317     * @see gudusoft.gsqlparser.nodes.TObjectName#getSourceTable()
318     */
319    public TTable getFinalTable() {
320        if (sourceNamespace == null && overrideTable == null) {
321            return null;
322        }
323
324        // For SubqueryNamespace: calculated columns should NOT trace to base table
325        // They are derived values that don't exist in the underlying physical table
326        // Example: SELECT *, expr AS alias FROM table - alias is calculated, not from table
327        //
328        // IMPORTANT: Check BEFORE overrideTable to prevent alias/calculated columns
329        // from being traced to base tables even when overrideTable is explicitly set
330        if (sourceNamespace instanceof SubqueryNamespace && isCalculatedColumn()) {
331            return null;
332        }
333
334        // For CTENamespace: calculated columns ARE the CTE's own columns
335        // They should trace to the CTE itself (referencing table), NOT to underlying base tables
336        // Example: WITH cte AS (SELECT SUM(x) AS total FROM t) SELECT total FROM cte
337        // The 'total' column traces to 'cte', not to 't'
338        if (sourceNamespace instanceof CTENamespace && isCalculatedColumn()) {
339            return ((CTENamespace) sourceNamespace).getReferencingTable();
340        }
341
342        // For SubqueryNamespace: column aliases should NOT trace to base table
343        // The alias name doesn't exist in the base table's schema
344        // Example: SELECT col AS alias FROM table - alias should not trace to table.alias
345        if (sourceNamespace instanceof SubqueryNamespace && isColumnAlias()) {
346            return null;
347        }
348
349        // For CTENamespace: column aliases ARE the CTE's own columns
350        // They should trace to the CTE itself, NOT to underlying base tables
351        // Example: WITH cte AS (SELECT x AS y FROM t) SELECT y FROM cte
352        // The 'y' column traces to 'cte', not to 't'
353        if (sourceNamespace instanceof CTENamespace && isColumnAlias()) {
354            return ((CTENamespace) sourceNamespace).getReferencingTable();
355        }
356
357        // For CTENamespace: explicit column names ARE the CTE's own columns
358        // They should trace to the CTE itself, NOT to underlying base tables
359        // Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) SELECT c1 FROM cte
360        // The 'c1' column traces to 'cte', not to 't' (because 'c1' doesn't exist in 't')
361        if (sourceNamespace instanceof CTENamespace && isCTEExplicitColumn()) {
362            return ((CTENamespace) sourceNamespace).getReferencingTable();
363        }
364
365        // For SubqueryNamespace: passthrough columns that reference aliases should NOT trace
366        // Example: SELECT stat_typ FROM (SELECT stat_typ = stellplatz_typ FROM t) AS b
367        // The stat_typ in outer query references b.stat_typ which is an alias
368        if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToAlias()) {
369            return null;
370        }
371
372        // For SubqueryNamespace: passthrough columns that reference calculated columns should NOT trace
373        // Example: SELECT kko_lfz_9 FROM (SELECT CASE...END AS kko_lfz_9 FROM t) subq
374        // The outer kko_lfz_9 references a calculated column in the subquery
375        if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToCalculatedInSubquery()) {
376            return null;
377        }
378
379        // For CTENamespace: passthrough columns that reference calculated subquery columns should NOT trace
380        // Example: WITH DataCTE AS (SELECT subq.calc_col FROM (SELECT CASE...END AS calc_col FROM t) subq)
381        // The CTE column calc_col references a calculated column in the subquery
382        if (sourceNamespace instanceof CTENamespace && isPassthroughToCalculatedInCTE()) {
383            return null;
384        }
385
386        // For CTE explicit column + star pattern: c1/c2/c3 trace to the star, NOT through the star
387        // Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees)
388        // Without metadata, c1 traces to Employees.* (the star), not Employees.c1 (which doesn't exist)
389        // The evidence "cte_explicit_column_via_star" indicates this pattern
390        if ("cte_explicit_column_via_star".equals(evidence)) {
391            return null;
392        }
393
394        // If an override table is set (e.g., for traced columns), use it
395        if (overrideTable != null) {
396            return overrideTable;
397        }
398
399        if (sourceNamespace == null) {
400            return null;
401        }
402
403        // For UnionNamespace: UNION columns don't belong to any specific physical table.
404        // They're a combination of multiple branches. UnionNamespace.getFinalTable() returns
405        // the first branch's table which is incorrect for tracking column origins.
406        if (sourceNamespace instanceof UnionNamespace) {
407            return null;
408        }
409
410        // For SubqueryNamespace without override table: if the subquery has multiple tables
411        // AND no qualified star to identify the source, we can't determine which table
412        // the column comes from. Returning the first table would be incorrect.
413        // Example: FROM CDS_H_PARTNER PAR, (SELECT kategorie ... FROM CDS_H_KUNDEN_OBJEKT) subq
414        // But if there's a qualified star like "ta.*", that identifies the source table.
415        //
416        // IMPORTANT: For Teradata, implicit lateral derived tables (auto-added tables when
417        // a column references an undeclared table in WHERE clause) should be excluded from
418        // the multiple-table count. These are syntactic sugar and shouldn't affect column
419        // resolution to the actual source table.
420        if (sourceNamespace instanceof SubqueryNamespace && overrideTable == null) {
421            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
422            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery();
423            if (subquery != null && subquery.tables != null) {
424                // Count only real tables (excluding implicit lateral derived tables)
425                int realTableCount = countRealTables(subquery.tables);
426                if (realTableCount > 1) {
427                    // Check if there's a qualified star that can identify the source
428                    if (!hasQualifiedStar(subquery)) {
429                        return null;
430                    }
431                }
432            }
433        }
434
435        // For CTENamespace with multiple tables (e.g., JOIN): trace the specific column
436        // to its correct source table using the definitionNode
437        // Example: WITH cte AS (SELECT m.album_id, b.band_name FROM albums m JOIN bands b ...)
438        // When tracing 'band_name', we need to find it comes from 'b' (bands), not 'm' (albums)
439        if (sourceNamespace instanceof CTENamespace) {
440            CTENamespace cteNs = (CTENamespace) sourceNamespace;
441            TTable tracedTable = null;
442
443            if (definitionNode instanceof TResultColumn) {
444                // Direct case: definitionNode is a TResultColumn from the CTE's SELECT list
445                tracedTable = traceColumnThroughCTE(cteNs, (TResultColumn) definitionNode);
446            } else {
447                // Indirect case: The column might be traced through a star column
448                // Try to find the column by name in the CTE chain
449                tracedTable = traceColumnByNameThroughCTE(cteNs, exposedName);
450            }
451
452            if (tracedTable != null) {
453                return tracedTable;
454            }
455
456            // For CTEs with multiple tables, if tracing failed (unqualified column),
457            // return null instead of the first table to avoid incorrect lineage.
458            // Example: WITH cte AS (SELECT musicians.id, musician_name, music_bands.band_name
459            //          FROM musicians JOIN ... JOIN music_bands)
460            // The unqualified 'musician_name' cannot be traced to any specific table
461            // without metadata, so we should NOT guess and pick the first table.
462            TSelectSqlStatement cteSelect = cteNs.getSelectStatement();
463            if (cteSelect != null && cteSelect.tables != null) {
464                int tableCount = countRealTables(cteSelect.tables);
465                if (tableCount > 1) {
466                    // Cannot determine which table - don't guess
467                    return null;
468                }
469            }
470        }
471
472        return sourceNamespace.getFinalTable();
473    }
474
475    /**
476     * Trace a column by name through a CTE to find its correct source table.
477     * This handles the case when the column is traced through star columns
478     * and we don't have the direct TResultColumn definition.
479     *
480     * @param cteNs The CTENamespace to trace through
481     * @param columnName The name of the column to find
482     * @return The correct source table, or null if unable to determine
483     */
484    private TTable traceColumnByNameThroughCTE(CTENamespace cteNs, String columnName) {
485        if (columnName == null || columnName.isEmpty()) {
486            return null;
487        }
488
489        // Get the CTE's SELECT statement
490        TSelectSqlStatement cteSelect = cteNs.getSelectStatement();
491        if (cteSelect == null) {
492            return null;
493        }
494
495        // First, check if this CTE has explicit columns matching the name
496        TTable result = findColumnInSelectList(cteSelect, columnName);
497        if (result != null) {
498            return result;
499        }
500
501        // If not found directly, check if this CTE uses SELECT * from another CTE
502        if (cteSelect.tables != null) {
503            for (int i = 0; i < cteSelect.tables.size(); i++) {
504                TTable table = cteSelect.tables.getTable(i);
505                if (table == null) continue;
506
507                // If it references another CTE, trace through it
508                if (table.isCTEName() && table.getCTE() != null) {
509                    gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE();
510                    TSelectSqlStatement underlyingSelect = underlyingCte.getSubquery();
511                    if (underlyingSelect != null) {
512                        result = findColumnInSelectList(underlyingSelect, columnName);
513                        if (result != null) {
514                            return result;
515                        }
516                    }
517                }
518            }
519        }
520
521        return null;
522    }
523
524    /**
525     * Trace a column through a CTE to find its correct source table.
526     * This handles CTEs with JOINs where columns come from different tables,
527     * including CTEs with star columns that reference other CTEs.
528     *
529     * @param cteNs The CTENamespace
530     * @param resultColumn The TResultColumn from the CTE's SELECT list
531     * @return The correct source table, or null if unable to determine
532     */
533    private TTable traceColumnThroughCTE(CTENamespace cteNs, TResultColumn resultColumn) {
534        if (resultColumn == null || resultColumn.getExpr() == null) {
535            return null;
536        }
537
538        TExpression expr = resultColumn.getExpr();
539
540        // Check if the expression is a star column (e.g., SELECT * FROM other_cte)
541        // In this case, we need to trace through to the underlying CTE
542        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
543            TObjectName colRef = expr.getObjectOperand();
544            if (colRef != null && "*".equals(colRef.getColumnNameOnly())) {
545                // This is a star column - trace through to find the actual column
546                return traceColumnThroughStarInCTE(cteNs, exposedName);
547            }
548        }
549
550        // Check if the expression is a simple column reference
551        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
552            return null;
553        }
554
555        TObjectName colRef = expr.getObjectOperand();
556        if (colRef == null) {
557            return null;
558        }
559
560        // Check if the column has a table qualifier (e.g., "b.band_name")
561        String tableQualifier = colRef.getTableString();
562        if (tableQualifier == null || tableQualifier.isEmpty()) {
563            // No qualifier - can't determine which table
564            return null;
565        }
566
567        // Get the CTE's subquery to find the table with matching alias
568        TSelectSqlStatement cteSubquery = cteNs.getSelectStatement();
569        if (cteSubquery == null || cteSubquery.tables == null) {
570            return null;
571        }
572
573        // Search for the table with matching alias or name
574        for (int i = 0; i < cteSubquery.tables.size(); i++) {
575            TTable table = cteSubquery.tables.getTable(i);
576            if (table == null) continue;
577
578            // Check alias match
579            String alias = table.getAliasName();
580            if (alias != null && alias.equalsIgnoreCase(tableQualifier)) {
581                // Found the table - now trace to its final physical table if needed
582                return traceToPhysicalTable(table);
583            }
584
585            // Check table name match (for unaliased tables)
586            String tableName = table.getTableName() != null ? table.getTableName().toString() : null;
587            if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) {
588                return traceToPhysicalTable(table);
589            }
590        }
591
592        return null;
593    }
594
595    /**
596     * Trace a specific column through a CTE that uses SELECT *.
597     * This finds the underlying CTE that defines the column and traces it to the correct table.
598     *
599     * @param cteNs The CTE namespace with SELECT *
600     * @param columnName The name of the column to trace
601     * @return The correct source table, or null if unable to determine
602     */
603    private TTable traceColumnThroughStarInCTE(CTENamespace cteNs, String columnName) {
604        if (columnName == null || columnName.isEmpty()) {
605            return null;
606        }
607
608        TSelectSqlStatement cteSubquery = cteNs.getSelectStatement();
609        if (cteSubquery == null || cteSubquery.tables == null) {
610            return null;
611        }
612
613        // Find the underlying CTE or table that the star column references
614        for (int i = 0; i < cteSubquery.tables.size(); i++) {
615            TTable table = cteSubquery.tables.getTable(i);
616            if (table == null) continue;
617
618            // If it's a CTE reference, look for the column in that CTE
619            if (table.isCTEName() && table.getCTE() != null) {
620                gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE();
621                TSelectSqlStatement underlyingSubquery = underlyingCte.getSubquery();
622                if (underlyingSubquery != null) {
623                    // Look for the column in the underlying CTE's SELECT list
624                    TTable tracedTable = findColumnInSelectList(underlyingSubquery, columnName);
625                    if (tracedTable != null) {
626                        return tracedTable;
627                    }
628                }
629            }
630        }
631
632        return null;
633    }
634
635    /**
636     * Find a column by name in a SELECT list and trace it to its source table.
637     *
638     * @param selectStmt The SELECT statement to search
639     * @param columnName The column name to find
640     * @return The source table for the column, or null if not found
641     */
642    private TTable findColumnInSelectList(TSelectSqlStatement selectStmt, String columnName) {
643        if (selectStmt == null || selectStmt.getResultColumnList() == null) {
644            return null;
645        }
646
647        gudusoft.gsqlparser.nodes.TResultColumnList resultList = selectStmt.getResultColumnList();
648        for (int i = 0; i < resultList.size(); i++) {
649            TResultColumn rc = resultList.getResultColumn(i);
650            if (rc == null) continue;
651
652            // Get the exposed name (alias or column name)
653            String exposedColName = rc.getAliasClause() != null
654                ? rc.getAliasClause().toString()
655                : (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null
656                    ? rc.getExpr().getObjectOperand().getColumnNameOnly()
657                    : null);
658
659            if (exposedColName != null && exposedColName.equalsIgnoreCase(columnName)) {
660                // Found the column - trace it to its source table
661                TExpression expr = rc.getExpr();
662                if (expr != null && expr.getExpressionType() == EExpressionType.simple_object_name_t) {
663                    TObjectName colRef = expr.getObjectOperand();
664                    if (colRef != null) {
665                        String tableQualifier = colRef.getTableString();
666                        if (tableQualifier != null && !tableQualifier.isEmpty()) {
667                            // Find the table with this qualifier in the FROM clause
668                            if (selectStmt.tables != null) {
669                                for (int j = 0; j < selectStmt.tables.size(); j++) {
670                                    TTable table = selectStmt.tables.getTable(j);
671                                    if (table == null) continue;
672
673                                    String alias = table.getAliasName();
674                                    if (alias != null && alias.equalsIgnoreCase(tableQualifier)) {
675                                        return traceToPhysicalTable(table);
676                                    }
677
678                                    String tableName = table.getTableName() != null
679                                        ? table.getTableName().toString() : null;
680                                    if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) {
681                                        return traceToPhysicalTable(table);
682                                    }
683                                }
684                            }
685                        }
686                    }
687                }
688            }
689        }
690
691        return null;
692    }
693
694    /**
695     * Trace a table to its underlying physical table.
696     * Handles CTEs, subqueries, and JOINs.
697     */
698    private TTable traceToPhysicalTable(TTable table) {
699        if (table == null) {
700            return null;
701        }
702
703        // If it's already a physical table, return it
704        if (table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !table.isCTEName()) {
705            return table;
706        }
707
708        // If it's a CTE reference, trace through the CTE
709        if (table.isCTEName() && table.getCTE() != null) {
710            // Use a simple approach - get the first physical table from the CTE
711            // This could be enhanced to trace specific columns through nested CTEs
712            gudusoft.gsqlparser.nodes.TCTE nestedCte = table.getCTE();
713            if (nestedCte.getSubquery() != null && nestedCte.getSubquery().tables != null) {
714                for (int i = 0; i < nestedCte.getSubquery().tables.size(); i++) {
715                    TTable nestedTable = nestedCte.getSubquery().tables.getTable(i);
716                    TTable physical = traceToPhysicalTable(nestedTable);
717                    if (physical != null) {
718                        return physical;
719                    }
720                }
721            }
722        }
723
724        // If it's a subquery, trace through it
725        if (table.getSubquery() != null) {
726            SubqueryNamespace nestedNs = new SubqueryNamespace(
727                table.getSubquery(),
728                table.getAliasName(),
729                null  // nameMatcher not needed for simple tracing
730            );
731            return nestedNs.getFinalTable();
732        }
733
734        return null;
735    }
736
737    /**
738     * Get all physical tables that this column might originate from.
739     *
740     * <p>For columns from UNION queries, this returns tables from ALL branches,
741     * not just the first one. This is essential for proper lineage tracking
742     * where a column like {@code actor_id} in a UNION query should be linked
743     * to {@code actor.actor_id}, {@code actor2.actor_id}, {@code actor3.actor_id}.</p>
744     *
745     * <p>For regular single-table sources, this returns a single-element list
746     * with the same table as {@link #getFinalTable()}.</p>
747     *
748     * @return List of all physical tables, or empty list if unable to determine
749     */
750    public java.util.List<TTable> getAllFinalTables() {
751        // If this ColumnSource has explicit candidateTables set (e.g., from UNION inference),
752        // use those instead of delegating to namespace. This is critical for UNION queries
753        // where only branches with SELECT * should contribute candidate tables for inferred columns.
754        // An EMPTY list means "no matching tables" - return it as-is without delegating.
755        // A NULL means "not determined" - delegate to namespace.
756        if (candidateTables != null) {
757            return candidateTables;
758        }
759
760        if (sourceNamespace == null) {
761            if (overrideTable != null) {
762                return java.util.Collections.singletonList(overrideTable);
763            }
764            return java.util.Collections.emptyList();
765        }
766
767        // For calculated columns and aliases in SubqueryNamespace, don't trace
768        if (sourceNamespace instanceof SubqueryNamespace) {
769            if (isCalculatedColumn() || isColumnAlias()) {
770                return java.util.Collections.emptyList();
771            }
772        }
773
774        // For CTENamespace calculated/alias/explicit columns, trace to CTE itself
775        if (sourceNamespace instanceof CTENamespace) {
776            if (isCalculatedColumn() || isColumnAlias() || isCTEExplicitColumn()) {
777                TTable cteTable = ((CTENamespace) sourceNamespace).getReferencingTable();
778                if (cteTable != null) {
779                    return java.util.Collections.singletonList(cteTable);
780                }
781                return java.util.Collections.emptyList();
782            }
783        }
784
785        // Delegate to namespace - handles UNION queries via UnionNamespace.getAllFinalTables()
786        return sourceNamespace.getAllFinalTables();
787    }
788
789    /**
790     * Check if this column is a passthrough reference to an underlying alias.
791     *
792     * <p>A passthrough column is a simple column reference in a subquery that
793     * references another column from its FROM clause. If that underlying column
794     * is an alias, then this passthrough should not trace to the base table.</p>
795     *
796     * <p>Example: In {@code SELECT stat_typ FROM (SELECT stat_typ = col FROM t) AS b},
797     * the outer {@code stat_typ} is a passthrough to {@code b.stat_typ}, which is an alias.</p>
798     *
799     * @return true if this is a passthrough to an alias
800     */
801    private boolean isPassthroughToAlias() {
802        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
803            return false;
804        }
805
806        TResultColumn rc = (TResultColumn) definitionNode;
807        TExpression expr = rc.getExpr();
808        if (expr == null) {
809            return false;
810        }
811
812        // Only check simple column references (passthroughs)
813        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
814            return false;
815        }
816
817        // If this column itself has an alias that differs, it's already handled by isColumnAlias()
818        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
819            return false;
820        }
821
822        // Get the column name being referenced
823        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
824        if (objName == null) {
825            return false;
826        }
827        String columnName = objName.getColumnNameOnly();
828        if (columnName == null || columnName.isEmpty()) {
829            return false;
830        }
831
832        // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource
833        if (sourceNamespace instanceof SubqueryNamespace) {
834            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
835            ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName);
836            if (underlyingSource != null) {
837                // Check if the underlying column is an alias or calculated
838                if (underlyingSource.isColumnAlias() || underlyingSource.isCalculatedColumn()) {
839                    return true;
840                }
841                // Recursively check if it's a passthrough to alias
842                if (underlyingSource.isPassthroughToAlias()) {
843                    return true;
844                }
845            }
846        }
847
848        return false;
849    }
850
851    /**
852     * Check if this subquery column is a passthrough reference to a calculated column.
853     *
854     * <p>A subquery column is a passthrough to calculated if:</p>
855     * <ol>
856     *   <li>The column definition is a simple column reference (e.g., kko_lfz_9)</li>
857     *   <li>The referenced column in the FROM scope is calculated (CASE, function, etc.)</li>
858     * </ol>
859     *
860     * <p>Example:</p>
861     * <pre>
862     * SELECT kko_lfz_9 AS KKO_LFZ_9
863     * FROM (SELECT CASE WHEN... END AS kko_lfz_9 FROM t) subq
864     * </pre>
865     * <p>Here, kko_lfz_9 in the outer query is a passthrough to a calculated column in subq.</p>
866     *
867     * <p>This differs from {@link #isPassthroughToAlias()} which skips columns with aliases.
868     * Here we check even aliased passthroughs to see if they reference calculated columns.</p>
869     *
870     * @return true if this is a passthrough to a calculated column in a subquery
871     */
872    private boolean isPassthroughToCalculatedInSubquery() {
873        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
874            return false;
875        }
876
877        TResultColumn rc = (TResultColumn) definitionNode;
878        TExpression expr = rc.getExpr();
879        if (expr == null) {
880            return false;
881        }
882
883        // Only check simple column references (passthroughs)
884        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
885            return false;
886        }
887
888        // Get the column name being referenced
889        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
890        if (objName == null) {
891            return false;
892        }
893        String columnName = objName.getColumnNameOnly();
894        if (columnName == null || columnName.isEmpty()) {
895            return false;
896        }
897
898        // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource
899        if (sourceNamespace instanceof SubqueryNamespace) {
900            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
901            ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName);
902            if (underlyingSource != null) {
903                // Check if the underlying column is calculated
904                if (underlyingSource.isCalculatedColumn()) {
905                    return true;
906                }
907                // Recursively check if it's a passthrough to calculated
908                if (underlyingSource.isPassthroughToCalculatedInSubquery()) {
909                    return true;
910                }
911            }
912        }
913
914        return false;
915    }
916
917    /**
918     * Check if this CTE column is a passthrough reference to a calculated column in a subquery or nested CTE.
919     *
920     * <p>A CTE column is a passthrough to calculated if:</p>
921     * <ol>
922     *   <li>The column definition is a simple qualified column reference (e.g., subq.calc_col or cte.calc_col)</li>
923     *   <li>The qualifier refers to a subquery or CTE in the CTE's body</li>
924     *   <li>The referenced column in that subquery/CTE is calculated (CASE, function, etc.)</li>
925     * </ol>
926     *
927     * <p>Example with subquery:</p>
928     * <pre>
929     * WITH DataCTE AS (
930     *   SELECT ErrorCountsCTE.ErrorSeverityCategory  -- passthrough
931     *   FROM (SELECT CASE...END AS ErrorSeverityCategory FROM t) ErrorCountsCTE
932     * )
933     * </pre>
934     *
935     * <p>Example with nested CTE:</p>
936     * <pre>
937     * WITH attendance_summary AS (
938     *   SELECT date_trunc('month', attendance_date) as month FROM attendance
939     * )
940     * WITH outer_cte AS (
941     *   SELECT a.month FROM attendance_summary a  -- passthrough to calculated in nested CTE
942     * )
943     * </pre>
944     *
945     * @return true if this is a passthrough to a calculated column in a CTE
946     */
947    private boolean isPassthroughToCalculatedInCTE() {
948        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
949            return false;
950        }
951
952        TResultColumn rc = (TResultColumn) definitionNode;
953        TExpression expr = rc.getExpr();
954        if (expr == null) {
955            return false;
956        }
957
958        // Only check simple qualified column references (passthroughs like subq.column)
959        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
960            return false;
961        }
962
963        // Get the column reference
964        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
965        if (objName == null) {
966            return false;
967        }
968
969        // Must have a table qualifier (e.g., "ErrorCountsCTE" in "ErrorCountsCTE.ErrorSeverityCategory")
970        String tableQualifier = objName.getTableString();
971        if (tableQualifier == null || tableQualifier.isEmpty()) {
972            return false;
973        }
974
975        String columnName = objName.getColumnNameOnly();
976        if (columnName == null || columnName.isEmpty()) {
977            return false;
978        }
979
980        // Get the CTE's subquery to find the referenced subquery alias
981        if (!(sourceNamespace instanceof CTENamespace)) {
982            return false;
983        }
984
985        CTENamespace cteNs = (CTENamespace) sourceNamespace;
986        gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE();
987        if (cte == null || cte.getSubquery() == null) {
988            return false;
989        }
990
991        // Find the subquery/table with this alias in the CTE's body
992        gudusoft.gsqlparser.stmt.TSelectSqlStatement cteBody = cte.getSubquery();
993        TTable referencedTable = findTableByAlias(cteBody, tableQualifier);
994        if (referencedTable == null) {
995            return false;
996        }
997
998        // Case 1: Referenced table is a subquery
999        if (referencedTable.getSubquery() != null) {
1000            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = referencedTable.getSubquery();
1001            return isCalculatedColumnInSelect(subquery, columnName);
1002        }
1003
1004        // Case 2: Referenced table is a CTE reference
1005        if (referencedTable.isCTEName() && referencedTable.getCTE() != null) {
1006            gudusoft.gsqlparser.nodes.TCTE referencedCTE = referencedTable.getCTE();
1007            if (referencedCTE.getSubquery() != null) {
1008                return isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName);
1009            }
1010        }
1011
1012        return false;
1013    }
1014
1015    /**
1016     * Find a table in a SELECT statement by its alias.
1017     */
1018    private TTable findTableByAlias(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String alias) {
1019        if (select == null || select.tables == null || alias == null) {
1020            return null;
1021        }
1022
1023        for (int i = 0; i < select.tables.size(); i++) {
1024            TTable table = select.tables.getTable(i);
1025            if (table != null) {
1026                String tableAlias = table.getAliasName();
1027                if (tableAlias != null && tableAlias.equalsIgnoreCase(alias)) {
1028                    return table;
1029                }
1030                // Also check table name for non-aliased references
1031                if (tableAlias == null && table.getTableName() != null) {
1032                    String tableName = table.getTableName().toString();
1033                    if (tableName != null && tableName.equalsIgnoreCase(alias)) {
1034                        return table;
1035                    }
1036                }
1037            }
1038        }
1039        return null;
1040    }
1041
1042    /**
1043     * Check if a column in a SELECT statement is calculated (not a simple column reference).
1044     */
1045    private boolean isCalculatedColumnInSelect(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1046        if (select == null || select.getResultColumnList() == null || columnName == null) {
1047            return false;
1048        }
1049
1050        for (int i = 0; i < select.getResultColumnList().size(); i++) {
1051            TResultColumn rc = select.getResultColumnList().getResultColumn(i);
1052            if (rc == null) continue;
1053
1054            // Get the column name for this result column
1055            String rcName = null;
1056            if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1057                rcName = rc.getAliasClause().getAliasName().toString();
1058            } else if (rc.getExpr() != null &&
1059                       rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t &&
1060                       rc.getExpr().getObjectOperand() != null) {
1061                rcName = rc.getExpr().getObjectOperand().getColumnNameOnly();
1062            }
1063
1064            if (rcName != null && rcName.equalsIgnoreCase(columnName)) {
1065                // Found the column - check if it's calculated
1066                TExpression expr = rc.getExpr();
1067                if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) {
1068                    // Non-simple expression = calculated
1069                    return true;
1070                }
1071            }
1072        }
1073        return false;
1074    }
1075
1076    /**
1077     * Check if this column source represents a calculated expression.
1078     *
1079     * <p>A column is calculated if its definition is a TResultColumn with
1080     * a non-simple expression (not a direct column reference or star).</p>
1081     *
1082     * <p>For inferred columns (via star expansion), we trace back to the
1083     * source CTE/subquery to check if the original column is calculated.</p>
1084     *
1085     * @return true if this is a calculated column
1086     */
1087    public boolean isCalculatedColumn() {
1088        if (definitionNode == null) {
1089            // For inferred columns through star expansion, check if the underlying
1090            // column in the source CTE/subquery is calculated
1091            return isInferredFromCalculatedColumn();
1092        }
1093
1094        if (!(definitionNode instanceof TResultColumn)) {
1095            return false;
1096        }
1097
1098        TResultColumn rc = (TResultColumn) definitionNode;
1099        TExpression expr = rc.getExpr();
1100        if (expr == null) {
1101            return false;
1102        }
1103
1104        EExpressionType exprType = expr.getExpressionType();
1105
1106        // Simple column reference - NOT calculated (passthrough)
1107        if (exprType == EExpressionType.simple_object_name_t) {
1108            return false;
1109        }
1110
1111        // Star column - NOT calculated (passthrough)
1112        String colText = rc.toString();
1113        if (colText != null && colText.endsWith("*")) {
1114            return false;
1115        }
1116
1117        // SQL Server proprietary column alias (col = expr)
1118        if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) {
1119            if (expr.getRightOperand() != null &&
1120                expr.getRightOperand().getExpressionType() == EExpressionType.simple_object_name_t) {
1121                return false;
1122            }
1123        }
1124
1125        // Any other expression type is calculated
1126        return true;
1127    }
1128
1129    /**
1130     * Check if this is an inferred column (via star expansion) that originates from
1131     * a calculated column in the source CTE/subquery.
1132     *
1133     * <p>When a column is resolved through star expansion (e.g., SELECT * FROM CTE),
1134     * the definitionNode is null. We need to trace back to the source namespace
1135     * to check if the original column is calculated.</p>
1136     *
1137     * @return true if this inferred column traces back to a calculated column
1138     */
1139    private boolean isInferredFromCalculatedColumn() {
1140        // Only check for inferred columns (evidence contains "auto_inferred")
1141        if (evidence == null || !evidence.contains("auto_inferred")) {
1142            return false;
1143        }
1144
1145        // Need the source namespace and column name to trace
1146        if (sourceNamespace == null || exposedName == null) {
1147            return false;
1148        }
1149
1150        // For CTE namespace, check if the column is calculated in the CTE's SELECT list
1151        if (sourceNamespace instanceof CTENamespace) {
1152            CTENamespace cteNs = (CTENamespace) sourceNamespace;
1153            gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE();
1154            if (cte != null && cte.getSubquery() != null) {
1155                // First check the CTE's direct SELECT list
1156                if (isCalculatedColumnInSelect(cte.getSubquery(), exposedName)) {
1157                    return true;
1158                }
1159
1160                // If the CTE has a star column, trace through to referenced CTEs
1161                if (cteNs.hasStarColumn()) {
1162                    return isCalculatedInCTEChain(cte.getSubquery(), exposedName);
1163                }
1164            }
1165        }
1166
1167        // For Subquery namespace, check if the column is calculated in the subquery's SELECT list
1168        if (sourceNamespace instanceof SubqueryNamespace) {
1169            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
1170            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery();
1171            if (subquery != null) {
1172                // First check the subquery's direct SELECT list
1173                if (isCalculatedColumnInSelect(subquery, exposedName)) {
1174                    return true;
1175                }
1176
1177                // If the subquery has a star column, trace through to source tables
1178                if (subNs.hasStarColumn()) {
1179                    return isCalculatedInSubqueryChain(subquery, exposedName);
1180                }
1181            }
1182        }
1183
1184        return false;
1185    }
1186
1187    /**
1188     * Check if a column is calculated by tracing through CTE references.
1189     * This handles cases like Stage4 -> Stage3 -> Stage2 where the column
1190     * is calculated at some intermediate level.
1191     */
1192    private boolean isCalculatedInCTEChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1193        if (select == null || select.tables == null) {
1194            return false;
1195        }
1196
1197        // Look for CTE references in the FROM clause
1198        for (int i = 0; i < select.tables.size(); i++) {
1199            TTable table = select.tables.getTable(i);
1200            if (table != null && table.isCTEName() && table.getCTE() != null) {
1201                gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE();
1202                if (referencedCTE.getSubquery() != null) {
1203                    // Check if the column is calculated in this CTE
1204                    if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) {
1205                        return true;
1206                    }
1207                    // Recursively check the CTE chain
1208                    if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) {
1209                        return true;
1210                    }
1211                }
1212            }
1213        }
1214        return false;
1215    }
1216
1217    /**
1218     * Check if a column is calculated by tracing through subquery references.
1219     */
1220    private boolean isCalculatedInSubqueryChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1221        if (select == null || select.tables == null) {
1222            return false;
1223        }
1224
1225        // Look for subquery tables in the FROM clause
1226        for (int i = 0; i < select.tables.size(); i++) {
1227            TTable table = select.tables.getTable(i);
1228            if (table != null && table.getSubquery() != null) {
1229                gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery();
1230                // Check if the column is calculated in this subquery
1231                if (isCalculatedColumnInSelect(subquery, columnName)) {
1232                    return true;
1233                }
1234                // Recursively check the subquery chain
1235                if (isCalculatedInSubqueryChain(subquery, columnName)) {
1236                    return true;
1237                }
1238            }
1239            // Also check CTE references within subqueries
1240            if (table != null && table.isCTEName() && table.getCTE() != null) {
1241                gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE();
1242                if (referencedCTE.getSubquery() != null) {
1243                    if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) {
1244                        return true;
1245                    }
1246                    if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) {
1247                        return true;
1248                    }
1249                }
1250            }
1251        }
1252        return false;
1253    }
1254
1255    /**
1256     * Check if this column source represents a column alias (renamed column).
1257     *
1258     * <p>A column is an alias if it's a simple column reference in a subquery
1259     * that has been given a different name via AS or NAMED. For example:</p>
1260     * <ul>
1261     *   <li>{@code SELECT col AS alias FROM table} - alias is different from col</li>
1262     *   <li>{@code SELECT col (NAMED alias) FROM table} - Teradata NAMED syntax</li>
1263     *   <li>{@code SELECT alias = col FROM table} - SQL Server proprietary syntax</li>
1264     * </ul>
1265     *
1266     * <p>Column aliases should NOT trace to base tables because the alias name
1267     * doesn't exist as an actual column in the base table.</p>
1268     *
1269     * @return true if this is a column alias with a different name than the original
1270     */
1271    public boolean isColumnAlias() {
1272        if (definitionNode == null) {
1273            return false;
1274        }
1275
1276        if (!(definitionNode instanceof TResultColumn)) {
1277            return false;
1278        }
1279
1280        TResultColumn rc = (TResultColumn) definitionNode;
1281        TExpression expr = rc.getExpr();
1282        if (expr == null) {
1283            return false;
1284        }
1285
1286        EExpressionType exprType = expr.getExpressionType();
1287
1288        // Handle SQL Server proprietary alias syntax: alias = column
1289        // Example: stat_typ = stellplatz_typ
1290        if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) {
1291            TExpression rightExpr = expr.getRightOperand();
1292            TExpression leftExpr = expr.getLeftOperand();
1293            // Only if right side is a simple column reference
1294            if (rightExpr != null && leftExpr != null &&
1295                rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) {
1296                gudusoft.gsqlparser.nodes.TObjectName rightObjName = rightExpr.getObjectOperand();
1297                gudusoft.gsqlparser.nodes.TObjectName leftObjName = leftExpr.getObjectOperand();
1298                if (rightObjName != null && leftObjName != null) {
1299                    String origName = rightObjName.getColumnNameOnly();
1300                    String aliasName = leftObjName.getColumnNameOnly();
1301                    // If alias name differs from original column name, it's an alias
1302                    if (origName != null && aliasName != null &&
1303                        !origName.equalsIgnoreCase(aliasName)) {
1304                        return true;
1305                    }
1306                }
1307            }
1308            return false;
1309        }
1310
1311        // Standard alias syntax: column AS alias
1312        // Only applies to simple column references
1313        if (exprType != EExpressionType.simple_object_name_t) {
1314            return false;
1315        }
1316
1317        // Check if there's an alias that differs from the column name
1318        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1319            String aliasName = rc.getAliasClause().getAliasName().toString();
1320            if (aliasName != null && !aliasName.isEmpty()) {
1321                gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
1322                if (objName != null) {
1323                    String origName = objName.getColumnNameOnly();
1324                    // If alias name differs from original name, it's an alias
1325                    if (origName != null && !origName.equalsIgnoreCase(aliasName)) {
1326                        return true;
1327                    }
1328                }
1329            }
1330        }
1331
1332        return false;
1333    }
1334
1335    /**
1336     * Check if this column is a CTE explicit column with a different name than the underlying column.
1337     *
1338     * <p>A CTE explicit column is one defined in the CTE's column list that maps to a
1339     * different column name in the CTE's SELECT list. For example:</p>
1340     * <pre>
1341     * WITH cte(c1, c2) AS (SELECT id, name FROM users)
1342     * SELECT c1 FROM cte  -- c1 maps to 'id', names differ
1343     * </pre>
1344     *
1345     * <p>CTE explicit columns should NOT trace to base tables because the explicit
1346     * column name (c1) doesn't exist as an actual column in the base table (users).</p>
1347     *
1348     * @return true if this is a CTE explicit column with a different name
1349     */
1350    public boolean isCTEExplicitColumn() {
1351        // Must be from a CTENamespace
1352        if (!(sourceNamespace instanceof CTENamespace)) {
1353            return false;
1354        }
1355
1356        // Check evidence for explicit column marker
1357        if (!"cte_explicit_column".equals(evidence)) {
1358            return false;
1359        }
1360
1361        // Get the underlying column name from the definition node
1362        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
1363            return false;
1364        }
1365
1366        TResultColumn rc = (TResultColumn) definitionNode;
1367        TExpression expr = rc.getExpr();
1368        if (expr == null) {
1369            return false;
1370        }
1371
1372        // Get the column name from the SELECT list item
1373        String underlyingName = null;
1374
1375        // Check for alias first
1376        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1377            underlyingName = rc.getAliasClause().getAliasName().toString();
1378        }
1379        // Then check for simple column reference
1380        else if (expr.getExpressionType() == EExpressionType.simple_object_name_t &&
1381                 expr.getObjectOperand() != null) {
1382            underlyingName = expr.getObjectOperand().getColumnNameOnly();
1383        }
1384
1385        // If we can't determine the underlying name, assume it's different
1386        // (calculated expressions, etc. are definitely different from explicit column names)
1387        if (underlyingName == null) {
1388            return true;
1389        }
1390
1391        // If the exposed name differs from the underlying column name, it's an explicit column rename
1392        return !exposedName.equalsIgnoreCase(underlyingName);
1393    }
1394
1395    /**
1396     * Get the override table, if set.
1397     */
1398    public TTable getOverrideTable() {
1399        return overrideTable;
1400    }
1401
1402    /**
1403     * Get the candidate tables for ambiguous columns.
1404     *
1405     * <p>When a column could come from multiple tables (e.g., SELECT * FROM t1, t2),
1406     * this returns all possible source tables. End users can iterate through this
1407     * list to understand all potential sources for the column.</p>
1408     *
1409     * @return List of candidate tables, or empty list if not ambiguous
1410     */
1411    public List<TTable> getCandidateTables() {
1412        return candidateTables != null ? candidateTables : Collections.emptyList();
1413    }
1414
1415    /**
1416     * Check if this column has multiple candidate tables (is ambiguous).
1417     *
1418     * @return true if there are multiple candidate tables
1419     */
1420    public boolean isAmbiguous() {
1421        return candidateTables != null && candidateTables.size() > 1;
1422    }
1423
1424    /**
1425     * Get the field path for deep/record field access.
1426     *
1427     * <p>When a column reference includes field access beyond the base column,
1428     * this returns the field path. For example, in {@code customer.address.city},
1429     * if base column is {@code customer}, this returns a FieldPath with
1430     * segments {@code ["address", "city"]}.</p>
1431     *
1432     * @return The field path, or null if no field access
1433     */
1434    public FieldPath getFieldPath() {
1435        return fieldPath;
1436    }
1437
1438    /**
1439     * Check if this column source has a field path (deep/record field access).
1440     *
1441     * @return true if a non-empty field path exists
1442     */
1443    public boolean hasFieldPath() {
1444        return fieldPath != null && !fieldPath.isEmpty();
1445    }
1446
1447    /**
1448     * Check if this is a struct field access (has evidence "struct_field_access").
1449     *
1450     * <p>This is a convenience method for checking if this column source represents
1451     * a struct/record field dereference operation.</p>
1452     *
1453     * @return true if this is a struct field access
1454     */
1455    public boolean isStructFieldAccess() {
1456        return "struct_field_access".equals(evidence);
1457    }
1458
1459    /**
1460     * Checks if this is a definite resolution (confidence = 1.0)
1461     */
1462    public boolean isDefinite() {
1463        return confidence >= 1.0;
1464    }
1465
1466    /**
1467     * Checks if this is an inferred resolution (confidence < 1.0)
1468     */
1469    public boolean isInferred() {
1470        return confidence < 1.0;
1471    }
1472
1473    @Override
1474    public String toString() {
1475        StringBuilder sb = new StringBuilder();
1476        sb.append(exposedName);
1477        if (sourceNamespace != null) {
1478            sb.append(" from ").append(sourceNamespace.getDisplayName());
1479        }
1480        if (confidence < 1.0) {
1481            sb.append(String.format(" (confidence: %.2f)", confidence));
1482        }
1483        return sb.toString();
1484    }
1485
1486    /**
1487     * Creates a copy with updated confidence and evidence.
1488     * Used when merging or updating inference results.
1489     *
1490     * @deprecated Use {@link #withEvidence(ResolutionEvidence)} instead
1491     */
1492    public ColumnSource withConfidence(double newConfidence, String newEvidence) {
1493        return new ColumnSource(
1494            this.sourceNamespace,
1495            this.exposedName,
1496            this.definitionNode,
1497            newConfidence,
1498            newEvidence,
1499            this.overrideTable,
1500            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1501            null, // will create from legacy evidence
1502            this.fieldPath
1503        );
1504    }
1505
1506    /**
1507     * Creates a copy with updated ResolutionEvidence.
1508     * This is the preferred method for updating evidence in new code.
1509     *
1510     * @param newEvidence The new evidence detail
1511     * @return A new ColumnSource with updated evidence
1512     */
1513    public ColumnSource withEvidence(ResolutionEvidence newEvidence) {
1514        return new ColumnSource(
1515            this.sourceNamespace,
1516            this.exposedName,
1517            this.definitionNode,
1518            newEvidence != null ? newEvidence.getWeight() : this.confidence,
1519            newEvidence != null ? newEvidence.toLegacyEvidence() : this.evidence,
1520            this.overrideTable,
1521            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1522            newEvidence,
1523            this.fieldPath
1524        );
1525    }
1526
1527    /**
1528     * Creates a copy with candidate tables.
1529     * Used when a column could come from multiple tables.
1530     */
1531    public ColumnSource withCandidateTables(List<TTable> candidates) {
1532        return new ColumnSource(
1533            this.sourceNamespace,
1534            this.exposedName,
1535            this.definitionNode,
1536            this.confidence,
1537            this.evidence,
1538            this.overrideTable,
1539            candidates != null ? new java.util.ArrayList<>(candidates) : null,
1540            this.evidenceDetail,
1541            this.fieldPath
1542        );
1543    }
1544
1545    /**
1546     * Creates a copy with a field path for deep/record field access.
1547     *
1548     * <p>This method is used when resolving struct/record field access patterns
1549     * like {@code customer.address.city}. The base column is preserved as the
1550     * exposedName, and the field path captures the remaining segments.</p>
1551     *
1552     * @param newFieldPath The field path segments (beyond the base column)
1553     * @return A new ColumnSource with the field path set
1554     */
1555    public ColumnSource withFieldPath(FieldPath newFieldPath) {
1556        return new ColumnSource(
1557            this.sourceNamespace,
1558            this.exposedName,
1559            this.definitionNode,
1560            this.confidence,
1561            this.evidence,
1562            this.overrideTable,
1563            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1564            this.evidenceDetail,
1565            newFieldPath
1566        );
1567    }
1568
1569    /**
1570     * Creates a copy with a field path from a list of segments.
1571     *
1572     * <p>Convenience method for creating a ColumnSource with a field path
1573     * from a list of string segments.</p>
1574     *
1575     * @param segments The field path segments
1576     * @return A new ColumnSource with the field path set
1577     */
1578    public ColumnSource withFieldPath(List<String> segments) {
1579        return withFieldPath(FieldPath.of(segments));
1580    }
1581
1582    /**
1583     * Creates a copy with field path and updated evidence.
1584     *
1585     * <p>This method is used when resolving struct field access, combining
1586     * both the field path and the struct_field_access evidence marker.</p>
1587     *
1588     * @param newFieldPath The field path segments
1589     * @param newEvidence The evidence string (e.g., "struct_field_access")
1590     * @return A new ColumnSource with field path and evidence updated
1591     */
1592    public ColumnSource withFieldPath(FieldPath newFieldPath, String newEvidence) {
1593        return new ColumnSource(
1594            this.sourceNamespace,
1595            this.exposedName,
1596            this.definitionNode,
1597            this.confidence,
1598            newEvidence,
1599            this.overrideTable,
1600            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1601            null, // will create from legacy evidence
1602            newFieldPath
1603        );
1604    }
1605
1606    /**
1607     * Count the number of "real" tables in a table list, excluding implicit lateral derived tables.
1608     *
1609     * <p>Teradata supports implicit lateral derived tables, which are auto-added when a column
1610     * references an undeclared table in the WHERE clause. These should not be counted when
1611     * determining if a subquery has multiple tables for column resolution purposes.</p>
1612     *
1613     * @param tables The table list to count
1614     * @return The number of real (non-implicit) tables
1615     */
1616    private static int countRealTables(gudusoft.gsqlparser.nodes.TTableList tables) {
1617        if (tables == null) {
1618            return 0;
1619        }
1620        int count = 0;
1621        for (int i = 0; i < tables.size(); i++) {
1622            TTable table = tables.getTable(i);
1623            if (table != null && table.getEffectType() != gudusoft.gsqlparser.ETableEffectType.tetImplicitLateralDerivedTable) {
1624                count++;
1625            }
1626        }
1627        return count;
1628    }
1629
1630    /**
1631     * Check if a SELECT statement has a qualified star column (e.g., ta.*, tb.*).
1632     * Qualified stars identify which table columns come from in multi-table subqueries.
1633     */
1634    private static boolean hasQualifiedStar(gudusoft.gsqlparser.stmt.TSelectSqlStatement select) {
1635        if (select == null || select.getResultColumnList() == null) {
1636            return false;
1637        }
1638        gudusoft.gsqlparser.nodes.TResultColumnList resultCols = select.getResultColumnList();
1639        for (int i = 0; i < resultCols.size(); i++) {
1640            TResultColumn rc = resultCols.getResultColumn(i);
1641            if (rc != null) {
1642                String colStr = rc.toString().trim();
1643                // Qualified star has format "alias.*" or "table.*"
1644                if (colStr.endsWith("*") && colStr.contains(".")) {
1645                    return true;
1646                }
1647            }
1648        }
1649        return false;
1650    }
1651
1652    /**
1653     * Check if a column exists in a table's DDL definition.
1654     *
1655     * <p>This method checks the table's column definitions (from CREATE TABLE statements
1656     * parsed in the same script) to verify if the column name is defined.</p>
1657     *
1658     * @param table The table to check
1659     * @param columnName The column name to look for
1660     * @return true if the column exists in the table's DDL, false if not found or no DDL available
1661     */
1662    public static boolean isColumnInTableDdl(TTable table, String columnName) {
1663        if (table == null || columnName == null || columnName.isEmpty()) {
1664            return false;
1665        }
1666
1667        // Check if the table has column definitions (from CREATE TABLE DDL)
1668        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1669        if (columnDefs != null && columnDefs.size() > 0) {
1670            for (int i = 0; i < columnDefs.size(); i++) {
1671                gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i);
1672                if (colDef != null && colDef.getColumnName() != null) {
1673                    String defColName = colDef.getColumnName().toString();
1674                    if (defColName != null && defColName.equalsIgnoreCase(columnName)) {
1675                        return true;
1676                    }
1677                }
1678            }
1679            // DDL exists but column not found
1680            return false;
1681        }
1682
1683        // No DDL available - return false (cannot verify)
1684        return false;
1685    }
1686
1687    /**
1688     * Check if a table has DDL metadata available (from CREATE TABLE in same script).
1689     *
1690     * @param table The table to check
1691     * @return true if DDL metadata is available for this table
1692     */
1693    public static boolean hasTableDdl(TTable table) {
1694        if (table == null) {
1695            return false;
1696        }
1697        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1698        return columnDefs != null && columnDefs.size() > 0;
1699    }
1700
1701    /**
1702     * Check DDL verification status for a candidate table.
1703     *
1704     * <p>Returns a tri-state result:</p>
1705     * <ul>
1706     *   <li>1 = Column exists in table's DDL</li>
1707     *   <li>0 = Column NOT found in table's DDL (DDL available but column missing)</li>
1708     *   <li>-1 = Cannot verify (no DDL available for this table)</li>
1709     * </ul>
1710     *
1711     * @param table The candidate table to check
1712     * @param columnName The column name to verify
1713     * @return DDL verification status: 1 (exists), 0 (not found), -1 (no DDL)
1714     */
1715    public static int getDdlVerificationStatus(TTable table, String columnName) {
1716        if (table == null || columnName == null) {
1717            return -1;
1718        }
1719
1720        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1721        if (columnDefs == null || columnDefs.size() == 0) {
1722            return -1; // No DDL available
1723        }
1724
1725        // DDL available - check if column exists
1726        for (int i = 0; i < columnDefs.size(); i++) {
1727            gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i);
1728            if (colDef != null && colDef.getColumnName() != null) {
1729                String defColName = colDef.getColumnName().toString();
1730                if (defColName != null && defColName.equalsIgnoreCase(columnName)) {
1731                    return 1; // Column exists in DDL
1732                }
1733            }
1734        }
1735
1736        return 0; // DDL exists but column not found
1737    }
1738
1739    /**
1740     * Get DDL verification status for all candidate tables.
1741     *
1742     * <p>Returns a map from each candidate table to its DDL verification status:</p>
1743     * <ul>
1744     *   <li>1 = Column exists in table's DDL</li>
1745     *   <li>0 = Column NOT found in table's DDL</li>
1746     *   <li>-1 = Cannot verify (no DDL available)</li>
1747     * </ul>
1748     *
1749     * @return Map of candidate tables to their DDL verification status, or empty map if no candidates
1750     */
1751    public java.util.Map<TTable, Integer> getCandidateTableDdlStatus() {
1752        java.util.Map<TTable, Integer> result = new java.util.LinkedHashMap<>();
1753        if (candidateTables == null || candidateTables.isEmpty() || exposedName == null) {
1754            return result;
1755        }
1756
1757        for (TTable candidate : candidateTables) {
1758            int status = getDdlVerificationStatus(candidate, exposedName);
1759            result.put(candidate, status);
1760        }
1761        return result;
1762    }
1763}