001package gudusoft.gsqlparser.resolver2.model;
002
003import gudusoft.gsqlparser.EExpressionType;
004import gudusoft.gsqlparser.nodes.TExpression;
005import gudusoft.gsqlparser.nodes.TObjectName;
006import gudusoft.gsqlparser.nodes.TParseTreeNode;
007import gudusoft.gsqlparser.nodes.TResultColumn;
008import gudusoft.gsqlparser.nodes.TTable;
009import gudusoft.gsqlparser.resolver2.inference.EvidenceType;
010import gudusoft.gsqlparser.resolver2.namespace.INamespace;
011import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace;
012import gudusoft.gsqlparser.resolver2.namespace.CTENamespace;
013import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace;
014import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
015
016import java.util.Collections;
017import java.util.List;
018
019/**
020 * Represents the source of a column reference.
021 * Tracks where a column comes from, including intermediate transformations
022 * through subqueries and CTEs.
023 *
024 * Design principles:
025 * 1. Immutable - once created, cannot be modified
026 * 2. Recursive - can trace back through subquery/CTE layers
027 * 3. Confidence-scored - supports evidence-based inference
028 */
029public class ColumnSource {
030    /** The namespace where this column is exposed (e.g., subquery, table) */
031    private final INamespace sourceNamespace;
032
033    /** The name by which this column is exposed in the namespace */
034    private final String exposedName;
035
036    /** The AST node where this column is defined (TResultColumn, TTableColumn, etc.) */
037    private final TParseTreeNode definitionNode;
038
039    /** Location information for the definition */
040    private final SourceLocation definitionLocation;
041
042    /**
043     * Confidence score [0.0, 1.0]:
044     * - 1.0: Definite (from metadata or explicit definition)
045     * - 0.7-0.9: High confidence inference (strong evidence)
046     * - 0.5-0.7: Medium confidence inference (some evidence)
047     * - 0.0-0.5: Low confidence guess
048     */
049    private final double confidence;
050
051    /**
052     * Evidence that supports this resolution.
053     * Used for debugging and explaining inference decisions.
054     *
055     * @deprecated Use {@link #evidenceDetail} instead. This field is kept for backward
056     *             compatibility and will be derived from evidenceDetail if not explicitly set.
057     */
058    private final String evidence;
059
060    /**
061     * Structured evidence detail for this resolution.
062     * Provides type-safe evidence with confidence weight and source traceability.
063     * This is the preferred way to access resolution evidence.
064     *
065     * @see ResolutionEvidence
066     */
067    private final ResolutionEvidence evidenceDetail;
068
069    /**
070     * Override table for traced columns.
071     * When set, getFinalTable() returns this instead of namespace's table.
072     */
073    private final TTable overrideTable;
074
075    /**
076     * Candidate tables for ambiguous columns.
077     * When a column could come from multiple tables (e.g., SELECT * FROM t1, t2),
078     * this list contains all possible source tables so end users can access them.
079     */
080    private final List<TTable> candidateTables;
081
082    /**
083     * Field path for deep/record field access (e.g., struct.field.subfield).
084     *
085     * <p>When a column reference includes field access beyond the base column,
086     * this captures the field path. For example, in {@code customer.address.city},
087     * if base column is {@code customer}, fieldPath contains {@code ["address", "city"]}.</p>
088     *
089     * <p>This field is null or empty for regular column references without field access.</p>
090     *
091     * @see FieldPath
092     */
093    private final FieldPath fieldPath;
094
095    public ColumnSource(INamespace sourceNamespace,
096                       String exposedName,
097                       TParseTreeNode definitionNode,
098                       double confidence,
099                       String evidence) {
100        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, null, null);
101    }
102
103    public ColumnSource(INamespace sourceNamespace,
104                       String exposedName,
105                       TParseTreeNode definitionNode,
106                       double confidence,
107                       String evidence,
108                       TTable overrideTable) {
109        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, null);
110    }
111
112    public ColumnSource(INamespace sourceNamespace,
113                       String exposedName,
114                       TParseTreeNode definitionNode,
115                       double confidence,
116                       String evidence,
117                       TTable overrideTable,
118                       List<TTable> candidateTables) {
119        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, null, null);
120    }
121
122    /**
123     * Full constructor with all fields including ResolutionEvidence.
124     */
125    public ColumnSource(INamespace sourceNamespace,
126                       String exposedName,
127                       TParseTreeNode definitionNode,
128                       double confidence,
129                       String evidence,
130                       TTable overrideTable,
131                       List<TTable> candidateTables,
132                       ResolutionEvidence evidenceDetail) {
133        this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, evidenceDetail, null);
134    }
135
136    /**
137     * Full constructor with all fields including ResolutionEvidence and FieldPath.
138     *
139     * @param sourceNamespace The namespace where this column is exposed
140     * @param exposedName The name by which this column is exposed
141     * @param definitionNode The AST node where this column is defined
142     * @param confidence Confidence score [0.0, 1.0]
143     * @param evidence Evidence string for this resolution
144     * @param overrideTable Override table for traced columns
145     * @param candidateTables Candidate tables for ambiguous columns
146     * @param evidenceDetail Structured evidence detail
147     * @param fieldPath Field path for deep/record field access
148     */
149    public ColumnSource(INamespace sourceNamespace,
150                       String exposedName,
151                       TParseTreeNode definitionNode,
152                       double confidence,
153                       String evidence,
154                       TTable overrideTable,
155                       List<TTable> candidateTables,
156                       ResolutionEvidence evidenceDetail,
157                       FieldPath fieldPath) {
158        this.sourceNamespace = sourceNamespace;
159        this.exposedName = exposedName;
160        this.definitionNode = definitionNode;
161        this.definitionLocation = definitionNode != null
162            ? new SourceLocation(definitionNode)
163            : null;
164        this.confidence = Math.max(0.0, Math.min(1.0, confidence));
165        this.evidence = evidence;
166        this.overrideTable = overrideTable;
167        this.candidateTables = candidateTables != null ? Collections.unmodifiableList(candidateTables) : null;
168        this.fieldPath = fieldPath;
169        // If evidenceDetail not provided, create from legacy evidence
170        if (evidenceDetail != null) {
171            this.evidenceDetail = evidenceDetail;
172        } else if (evidence != null) {
173            this.evidenceDetail = ResolutionEvidence.fromLegacyEvidence(evidence, confidence, definitionNode);
174        } else {
175            this.evidenceDetail = null;
176        }
177    }
178
179    /**
180     * Constructor with ResolutionEvidence (preferred for new code).
181     */
182    public ColumnSource(INamespace sourceNamespace,
183                       String exposedName,
184                       TParseTreeNode definitionNode,
185                       ResolutionEvidence evidenceDetail) {
186        this(sourceNamespace, exposedName, definitionNode,
187             evidenceDetail != null ? evidenceDetail.getWeight() : 1.0,
188             evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata",
189             null, null, evidenceDetail);
190    }
191
192    /**
193     * Constructor with ResolutionEvidence and override table.
194     */
195    public ColumnSource(INamespace sourceNamespace,
196                       String exposedName,
197                       TParseTreeNode definitionNode,
198                       ResolutionEvidence evidenceDetail,
199                       TTable overrideTable) {
200        this(sourceNamespace, exposedName, definitionNode,
201             evidenceDetail != null ? evidenceDetail.getWeight() : 1.0,
202             evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata",
203             overrideTable, null, evidenceDetail);
204    }
205
206    /**
207     * Constructor for definite matches (confidence = 1.0)
208     */
209    public ColumnSource(INamespace sourceNamespace,
210                       String exposedName,
211                       TParseTreeNode definitionNode) {
212        this(sourceNamespace, exposedName, definitionNode, 1.0, "metadata");
213    }
214
215    public INamespace getSourceNamespace() {
216        return sourceNamespace;
217    }
218
219    public String getExposedName() {
220        return exposedName;
221    }
222
223    public TParseTreeNode getDefinitionNode() {
224        return definitionNode;
225    }
226
227    public SourceLocation getDefinitionLocation() {
228        return definitionLocation;
229    }
230
231    public double getConfidence() {
232        return confidence;
233    }
234
235    public String getEvidence() {
236        return evidence;
237    }
238
239    /**
240     * Get the structured evidence detail for this resolution.
241     *
242     * <p>This is the preferred way to access resolution evidence as it provides:
243     * <ul>
244     *   <li>Type-safe evidence type (enum)</li>
245     *   <li>Confidence weight with clear semantics</li>
246     *   <li>Source location for traceability</li>
247     *   <li>Human-readable messages</li>
248     * </ul>
249     *
250     * @return The structured evidence detail, or null if not available
251     */
252    public ResolutionEvidence getEvidenceDetail() {
253        return evidenceDetail;
254    }
255
256    /**
257     * Get the evidence type from the structured evidence detail.
258     * Convenience method for common use cases.
259     *
260     * @return The evidence type, or null if no evidence detail
261     */
262    public EvidenceType getEvidenceType() {
263        return evidenceDetail != null ? evidenceDetail.getType() : null;
264    }
265
266    /**
267     * Check if this resolution has definite evidence (not inferred).
268     * Definite evidence comes from DDL, metadata, or explicit definitions.
269     *
270     * @return true if evidence is definite
271     */
272    public boolean hasDefiniteEvidence() {
273        if (evidenceDetail != null) {
274            return evidenceDetail.isDefinite();
275        }
276        // Fallback: check legacy evidence and confidence
277        if (confidence >= 1.0) {
278            return true;
279        }
280        if (evidence != null) {
281            String lower = evidence.toLowerCase();
282            return lower.contains("metadata") || lower.contains("ddl") ||
283                   lower.contains("explicit") || lower.contains("insert_column");
284        }
285        return false;
286    }
287
288    /**
289     * Get the <b>final</b> physical table this column originates from after tracing
290     * through all subqueries and CTEs.
291     *
292     * <h3>Semantic Difference: getFinalTable() vs TObjectName.getSourceTable()</h3>
293     * <ul>
294     *   <li><b>getFinalTable()</b> (this method): The final physical table after
295     *       recursively tracing through all subqueries and CTEs. Use this for data lineage.</li>
296     *   <li><b>TObjectName.getSourceTable()</b>: The immediate source in the current scope.
297     *       For a column from a subquery, this points to the subquery's TTable itself.</li>
298     * </ul>
299     *
300     * <h3>Example</h3>
301     * <pre>{@code
302     * SELECT title FROM (SELECT * FROM books) sub
303     *
304     * For the 'title' column in outer SELECT:
305     * - TObjectName.getSourceTable() → TTable for subquery 'sub' (immediate source)
306     * - ColumnSource.getFinalTable() → TTable for 'books' (final physical table)
307     * }</pre>
308     *
309     * <p>For calculated columns in subqueries (expressions like {@code START_DT - x AS alias}),
310     * this returns null because such calculated columns don't originate from a physical
311     * table - they are derived values computed in the subquery.</p>
312     *
313     * <p>For aliased columns in subqueries (e.g., {@code SELECT t.id AS col1 FROM my_table t}),
314     * this traces through the alias to find the physical table, because the data still
315     * originates from the physical table even though the column has been renamed.</p>
316     *
317     * <p>Note: For CTEs, calculated columns ARE the CTE's own columns, so they trace
318     * to the CTE itself (handled by CTENamespace.getFinalTable()).</p>
319     *
320     * @return The physical table, or null if unable to determine or if calculated in subquery
321     * @see gudusoft.gsqlparser.nodes.TObjectName#getSourceTable()
322     */
323    public TTable getFinalTable() {
324        if (sourceNamespace == null && overrideTable == null) {
325            return null;
326        }
327
328        // For SubqueryNamespace: calculated columns should NOT trace to base table
329        // They are derived values that don't exist in the underlying physical table
330        // Example: SELECT *, expr AS alias FROM table - alias is calculated, not from table
331        //
332        // IMPORTANT: Check BEFORE overrideTable to prevent alias/calculated columns
333        // from being traced to base tables even when overrideTable is explicitly set
334        if (sourceNamespace instanceof SubqueryNamespace && isCalculatedColumn()) {
335            return null;
336        }
337
338        // For CTENamespace: calculated columns ARE the CTE's own columns
339        // They should trace to the CTE itself (referencing table), NOT to underlying base tables
340        // Example: WITH cte AS (SELECT SUM(x) AS total FROM t) SELECT total FROM cte
341        // The 'total' column traces to 'cte', not to 't'
342        if (sourceNamespace instanceof CTENamespace && isCalculatedColumn()) {
343            return ((CTENamespace) sourceNamespace).getReferencingTable();
344        }
345
346        // For SubqueryNamespace: column aliases - trace through to find the source table.
347        // The alias changes the column name but the data still originates from a physical table.
348        // Example: SELECT t.id AS col1 FROM my_table t - col1's data comes from my_table
349        if (sourceNamespace instanceof SubqueryNamespace && isColumnAlias()) {
350            return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace);
351        }
352
353        // For CTENamespace: column aliases ARE the CTE's own columns
354        // They should trace to the CTE itself, NOT to underlying base tables
355        // Example: WITH cte AS (SELECT x AS y FROM t) SELECT y FROM cte
356        // The 'y' column traces to 'cte', not to 't'
357        if (sourceNamespace instanceof CTENamespace && isColumnAlias()) {
358            return ((CTENamespace) sourceNamespace).getReferencingTable();
359        }
360
361        // For CTENamespace: explicit column names ARE the CTE's own columns
362        // They should trace to the CTE itself, NOT to underlying base tables
363        // Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) SELECT c1 FROM cte
364        // The 'c1' column traces to 'cte', not to 't' (because 'c1' doesn't exist in 't')
365        if (sourceNamespace instanceof CTENamespace && isCTEExplicitColumn()) {
366            return ((CTENamespace) sourceNamespace).getReferencingTable();
367        }
368
369        // For SubqueryNamespace: passthrough columns that reference calculated columns should NOT trace
370        // IMPORTANT: This check must come BEFORE isPassthroughToAlias() because
371        // isPassthroughToAlias() returns true for BOTH alias and calculated passthroughs.
372        // Example: SELECT kko_lfz_9 FROM (SELECT CASE...END AS kko_lfz_9 FROM t) subq
373        // The outer kko_lfz_9 references a calculated column in the subquery
374        if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToCalculatedInSubquery()) {
375            return null;
376        }
377
378        // For SubqueryNamespace: passthrough columns that reference aliases - trace through
379        // to find the source table. The data still originates from a physical table.
380        // Example: SELECT stat_typ FROM (SELECT stat_typ = stellplatz_typ FROM t) AS b
381        // The stat_typ in outer query references b.stat_typ → traces to t
382        if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToAlias()) {
383            return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace);
384        }
385
386        // For CTENamespace: passthrough columns that reference calculated subquery columns should NOT trace
387        // Example: WITH DataCTE AS (SELECT subq.calc_col FROM (SELECT CASE...END AS calc_col FROM t) subq)
388        // The CTE column calc_col references a calculated column in the subquery
389        if (sourceNamespace instanceof CTENamespace && isPassthroughToCalculatedInCTE()) {
390            return null;
391        }
392
393        // For CTE explicit column + star pattern: c1/c2/c3 trace to the star, NOT through the star
394        // Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees)
395        // Without metadata, c1 traces to Employees.* (the star), not Employees.c1 (which doesn't exist)
396        // The evidence "cte_explicit_column_via_star" indicates this pattern
397        if ("cte_explicit_column_via_star".equals(evidence)) {
398            return null;
399        }
400
401        // If an override table is set (e.g., for traced columns), use it
402        if (overrideTable != null) {
403            return overrideTable;
404        }
405
406        if (sourceNamespace == null) {
407            return null;
408        }
409
410        // For UnionNamespace: UNION columns don't belong to any specific physical table.
411        // They're a combination of multiple branches. UnionNamespace.getFinalTable() returns
412        // the first branch's table which is incorrect for tracking column origins.
413        if (sourceNamespace instanceof UnionNamespace) {
414            return null;
415        }
416
417        // For SubqueryNamespace without override table: if the subquery has multiple tables
418        // AND no qualified star to identify the source, we can't determine which table
419        // the column comes from. Returning the first table would be incorrect.
420        // Example: FROM CDS_H_PARTNER PAR, (SELECT kategorie ... FROM CDS_H_KUNDEN_OBJEKT) subq
421        // But if there's a qualified star like "ta.*", that identifies the source table.
422        //
423        // IMPORTANT: For Teradata, implicit lateral derived tables (auto-added tables when
424        // a column references an undeclared table in WHERE clause) should be excluded from
425        // the multiple-table count. These are syntactic sugar and shouldn't affect column
426        // resolution to the actual source table.
427        if (sourceNamespace instanceof SubqueryNamespace && overrideTable == null) {
428            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
429            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery();
430            if (subquery != null && subquery.tables != null) {
431                // Count only real tables (excluding implicit lateral derived tables)
432                int realTableCount = countRealTables(subquery.tables);
433                if (realTableCount > 1) {
434                    // Check if there's a qualified star that can identify the source
435                    if (!hasQualifiedStar(subquery)) {
436                        return null;
437                    }
438                }
439            }
440        }
441
442        // For CTENamespace with multiple tables (e.g., JOIN): trace the specific column
443        // to its correct source table using the definitionNode
444        // Example: WITH cte AS (SELECT m.album_id, b.band_name FROM albums m JOIN bands b ...)
445        // When tracing 'band_name', we need to find it comes from 'b' (bands), not 'm' (albums)
446        if (sourceNamespace instanceof CTENamespace) {
447            CTENamespace cteNs = (CTENamespace) sourceNamespace;
448            TTable tracedTable = null;
449
450            if (definitionNode instanceof TResultColumn) {
451                // Direct case: definitionNode is a TResultColumn from the CTE's SELECT list
452                tracedTable = traceColumnThroughCTE(cteNs, (TResultColumn) definitionNode);
453            } else {
454                // Indirect case: The column might be traced through a star column
455                // Try to find the column by name in the CTE chain
456                tracedTable = traceColumnByNameThroughCTE(cteNs, exposedName);
457            }
458
459            if (tracedTable != null) {
460                return tracedTable;
461            }
462
463            // For CTEs with multiple tables, if tracing failed (unqualified column),
464            // return null instead of the first table to avoid incorrect lineage.
465            // Example: WITH cte AS (SELECT musicians.id, musician_name, music_bands.band_name
466            //          FROM musicians JOIN ... JOIN music_bands)
467            // The unqualified 'musician_name' cannot be traced to any specific table
468            // without metadata, so we should NOT guess and pick the first table.
469            TSelectSqlStatement cteSelect = cteNs.getSelectStatement();
470            if (cteSelect != null && cteSelect.tables != null) {
471                int tableCount = countRealTables(cteSelect.tables);
472                if (tableCount > 1) {
473                    // Cannot determine which table - don't guess
474                    return null;
475                }
476            }
477        }
478
479        return sourceNamespace.getFinalTable();
480    }
481
482    /**
483     * Get the original column name in the physical table when this column is an alias.
484     *
485     * <p>When a column is aliased in a subquery (e.g., {@code SELECT t.id AS col1}),
486     * the exposed name is {@code col1} but the original column in the physical table
487     * is {@code id}. This method returns {@code id} so callers can pair the correct
488     * column name with the physical table returned by {@link #getFinalTable()}.</p>
489     *
490     * <p>For multi-level aliases (e.g., {@code SELECT ADCS_MSISDN FROM (SELECT MSISDN AS ADCS_MSISDN FROM t) sub}),
491     * this recursively traces through all levels to find the original column name ({@code MSISDN}).</p>
492     *
493     * @return The original column name if this is an alias, or null if not an alias
494     *         or unable to determine
495     */
496    public String getFinalColumnName() {
497        return getFinalColumnNameInternal(0);
498    }
499
500    /**
501     * Internal recursive implementation of getFinalColumnName with depth limit.
502     */
503    private String getFinalColumnNameInternal(int depth) {
504        if (depth > 10) return null; // safety limit for deeply nested aliases
505
506        if (!isColumnAlias() && !isPassthroughToAlias()) {
507            return null;
508        }
509
510        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
511            return null;
512        }
513
514        TResultColumn rc = (TResultColumn) definitionNode;
515        TExpression expr = rc.getExpr();
516        if (expr == null) {
517            return null;
518        }
519
520        TObjectName colRef = null;
521        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
522            colRef = expr.getObjectOperand();
523        } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) {
524            TExpression rightExpr = expr.getRightOperand();
525            if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) {
526                colRef = rightExpr.getObjectOperand();
527            }
528        }
529
530        if (colRef != null) {
531            // Check if this inner column reference itself has a ColumnSource that's also an alias.
532            // If so, recursively trace to get the deepest original column name.
533            ColumnSource innerSource = colRef.getColumnSource();
534            if (innerSource != null && (innerSource.isColumnAlias() || innerSource.isPassthroughToAlias())) {
535                String deeperName = innerSource.getFinalColumnNameInternal(depth + 1);
536                if (deeperName != null) {
537                    return deeperName;
538                }
539            }
540            return colRef.getColumnNameOnly();
541        }
542        return null;
543    }
544
545    /**
546     * Trace a column by name through a CTE to find its correct source table.
547     * This handles the case when the column is traced through star columns
548     * and we don't have the direct TResultColumn definition.
549     *
550     * @param cteNs The CTENamespace to trace through
551     * @param columnName The name of the column to find
552     * @return The correct source table, or null if unable to determine
553     */
554    private TTable traceColumnByNameThroughCTE(CTENamespace cteNs, String columnName) {
555        if (columnName == null || columnName.isEmpty()) {
556            return null;
557        }
558
559        // Get the CTE's SELECT statement
560        TSelectSqlStatement cteSelect = cteNs.getSelectStatement();
561        if (cteSelect == null) {
562            return null;
563        }
564
565        // First, check if this CTE has explicit columns matching the name
566        TTable result = findColumnInSelectList(cteSelect, columnName);
567        if (result != null) {
568            return result;
569        }
570
571        // If not found directly, check if this CTE uses SELECT * from another CTE
572        if (cteSelect.tables != null) {
573            for (int i = 0; i < cteSelect.tables.size(); i++) {
574                TTable table = cteSelect.tables.getTable(i);
575                if (table == null) continue;
576
577                // If it references another CTE, trace through it
578                if (table.isCTEName() && table.getCTE() != null) {
579                    gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE();
580                    TSelectSqlStatement underlyingSelect = underlyingCte.getSubquery();
581                    if (underlyingSelect != null) {
582                        result = findColumnInSelectList(underlyingSelect, columnName);
583                        if (result != null) {
584                            return result;
585                        }
586                    }
587                }
588            }
589        }
590
591        return null;
592    }
593
594    /**
595     * Trace a column through a CTE to find its correct source table.
596     * This handles CTEs with JOINs where columns come from different tables,
597     * including CTEs with star columns that reference other CTEs.
598     *
599     * @param cteNs The CTENamespace
600     * @param resultColumn The TResultColumn from the CTE's SELECT list
601     * @return The correct source table, or null if unable to determine
602     */
603    private TTable traceColumnThroughCTE(CTENamespace cteNs, TResultColumn resultColumn) {
604        if (resultColumn == null || resultColumn.getExpr() == null) {
605            return null;
606        }
607
608        TExpression expr = resultColumn.getExpr();
609
610        // Check if the expression is a star column (e.g., SELECT * FROM other_cte)
611        // In this case, we need to trace through to the underlying CTE
612        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
613            TObjectName colRef = expr.getObjectOperand();
614            if (colRef != null && "*".equals(colRef.getColumnNameOnly())) {
615                // This is a star column - trace through to find the actual column
616                return traceColumnThroughStarInCTE(cteNs, exposedName);
617            }
618        }
619
620        // Check if the expression is a simple column reference
621        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
622            return null;
623        }
624
625        TObjectName colRef = expr.getObjectOperand();
626        if (colRef == null) {
627            return null;
628        }
629
630        // Check if the column has a table qualifier (e.g., "b.band_name")
631        String tableQualifier = colRef.getTableString();
632        if (tableQualifier == null || tableQualifier.isEmpty()) {
633            // No qualifier - can't determine which table
634            return null;
635        }
636
637        // Get the CTE's subquery to find the table with matching alias
638        TSelectSqlStatement cteSubquery = cteNs.getSelectStatement();
639        if (cteSubquery == null || cteSubquery.tables == null) {
640            return null;
641        }
642
643        // Search for the table with matching alias or name
644        for (int i = 0; i < cteSubquery.tables.size(); i++) {
645            TTable table = cteSubquery.tables.getTable(i);
646            if (table == null) continue;
647
648            // Check alias match
649            String alias = table.getAliasName();
650            if (alias != null && alias.equalsIgnoreCase(tableQualifier)) {
651                // Found the table - now trace to its final physical table if needed
652                return traceToPhysicalTable(table);
653            }
654
655            // Check table name match (for unaliased tables)
656            String tableName = table.getTableName() != null ? table.getTableName().toString() : null;
657            if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) {
658                return traceToPhysicalTable(table);
659            }
660        }
661
662        return null;
663    }
664
665    /**
666     * Trace a specific column through a CTE that uses SELECT *.
667     * This finds the underlying CTE that defines the column and traces it to the correct table.
668     *
669     * @param cteNs The CTE namespace with SELECT *
670     * @param columnName The name of the column to trace
671     * @return The correct source table, or null if unable to determine
672     */
673    private TTable traceColumnThroughStarInCTE(CTENamespace cteNs, String columnName) {
674        if (columnName == null || columnName.isEmpty()) {
675            return null;
676        }
677
678        TSelectSqlStatement cteSubquery = cteNs.getSelectStatement();
679        if (cteSubquery == null || cteSubquery.tables == null) {
680            return null;
681        }
682
683        // Find the underlying CTE or table that the star column references
684        for (int i = 0; i < cteSubquery.tables.size(); i++) {
685            TTable table = cteSubquery.tables.getTable(i);
686            if (table == null) continue;
687
688            // If it's a CTE reference, look for the column in that CTE
689            if (table.isCTEName() && table.getCTE() != null) {
690                gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE();
691                TSelectSqlStatement underlyingSubquery = underlyingCte.getSubquery();
692                if (underlyingSubquery != null) {
693                    // Look for the column in the underlying CTE's SELECT list
694                    TTable tracedTable = findColumnInSelectList(underlyingSubquery, columnName);
695                    if (tracedTable != null) {
696                        return tracedTable;
697                    }
698                }
699            }
700        }
701
702        return null;
703    }
704
705    /**
706     * Find a column by name in a SELECT list and trace it to its source table.
707     *
708     * @param selectStmt The SELECT statement to search
709     * @param columnName The column name to find
710     * @return The source table for the column, or null if not found
711     */
712    private TTable findColumnInSelectList(TSelectSqlStatement selectStmt, String columnName) {
713        if (selectStmt == null || selectStmt.getResultColumnList() == null) {
714            return null;
715        }
716
717        gudusoft.gsqlparser.nodes.TResultColumnList resultList = selectStmt.getResultColumnList();
718        for (int i = 0; i < resultList.size(); i++) {
719            TResultColumn rc = resultList.getResultColumn(i);
720            if (rc == null) continue;
721
722            // Get the exposed name (alias or column name)
723            String exposedColName = rc.getAliasClause() != null
724                ? rc.getAliasClause().toString()
725                : (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null
726                    ? rc.getExpr().getObjectOperand().getColumnNameOnly()
727                    : null);
728
729            if (exposedColName != null && exposedColName.equalsIgnoreCase(columnName)) {
730                // Found the column - trace it to its source table
731                TExpression expr = rc.getExpr();
732                if (expr != null && expr.getExpressionType() == EExpressionType.simple_object_name_t) {
733                    TObjectName colRef = expr.getObjectOperand();
734                    if (colRef != null) {
735                        String tableQualifier = colRef.getTableString();
736                        if (tableQualifier != null && !tableQualifier.isEmpty()) {
737                            // Find the table with this qualifier in the FROM clause
738                            if (selectStmt.tables != null) {
739                                for (int j = 0; j < selectStmt.tables.size(); j++) {
740                                    TTable table = selectStmt.tables.getTable(j);
741                                    if (table == null) continue;
742
743                                    String alias = table.getAliasName();
744                                    if (alias != null && alias.equalsIgnoreCase(tableQualifier)) {
745                                        return traceToPhysicalTable(table);
746                                    }
747
748                                    String tableName = table.getTableName() != null
749                                        ? table.getTableName().toString() : null;
750                                    if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) {
751                                        return traceToPhysicalTable(table);
752                                    }
753                                }
754                            }
755                        }
756                    }
757                }
758            }
759        }
760
761        return null;
762    }
763
764    /**
765     * Trace a table to its underlying physical table.
766     * Handles CTEs, subqueries, and JOINs.
767     */
768    private TTable traceToPhysicalTable(TTable table) {
769        if (table == null) {
770            return null;
771        }
772
773        // If it's already a physical table, return it
774        if (table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !table.isCTEName()) {
775            return table;
776        }
777
778        // If it's a CTE reference, trace through the CTE
779        if (table.isCTEName() && table.getCTE() != null) {
780            // Use a simple approach - get the first physical table from the CTE
781            // This could be enhanced to trace specific columns through nested CTEs
782            gudusoft.gsqlparser.nodes.TCTE nestedCte = table.getCTE();
783            if (nestedCte.getSubquery() != null && nestedCte.getSubquery().tables != null) {
784                for (int i = 0; i < nestedCte.getSubquery().tables.size(); i++) {
785                    TTable nestedTable = nestedCte.getSubquery().tables.getTable(i);
786                    TTable physical = traceToPhysicalTable(nestedTable);
787                    if (physical != null) {
788                        return physical;
789                    }
790                }
791            }
792        }
793
794        // If it's a subquery, trace through it
795        if (table.getSubquery() != null) {
796            SubqueryNamespace nestedNs = new SubqueryNamespace(
797                table.getSubquery(),
798                table.getAliasName(),
799                null  // nameMatcher not needed for simple tracing
800            );
801            return nestedNs.getFinalTable();
802        }
803
804        return null;
805    }
806
807    /**
808     * Trace a column alias through a subquery to find its source table.
809     * When a column is aliased (e.g., SELECT t.id AS col1 FROM my_table t),
810     * the alias changes the column name but the data still comes from the
811     * underlying table. This method traces through to find that table.
812     *
813     * <p>Used by {@link #getTracedFinalTable()} to provide alias-aware lineage
814     * tracing. Handles both qualified (t.id AS col1) and unqualified (id AS col1)
815     * column references, as well as SQL Server proprietary alias syntax.</p>
816     *
817     * @param subNs The SubqueryNamespace containing the aliased column
818     * @return The physical table the column traces to, or null if undetermined
819     */
820    private TTable traceColumnAliasThroughSubquery(SubqueryNamespace subNs) {
821        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
822            return null;
823        }
824
825        TResultColumn rc = (TResultColumn) definitionNode;
826        TExpression expr = rc.getExpr();
827        if (expr == null) {
828            return null;
829        }
830
831        TObjectName colRef = null;
832
833        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
834            // Standard: SELECT col AS alias
835            colRef = expr.getObjectOperand();
836        } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) {
837            // SQL Server: SELECT alias = col
838            TExpression rightExpr = expr.getRightOperand();
839            if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) {
840                colRef = rightExpr.getObjectOperand();
841            }
842        }
843
844        if (colRef == null) {
845            return null;
846        }
847
848        // Use the resolved sourceTable from the inner column reference
849        TTable sourceTable = colRef.getSourceTable();
850        if (sourceTable != null) {
851            return traceToPhysicalTable(sourceTable);
852        }
853
854        // Fallback: if no sourceTable resolved, try to find by table qualifier
855        String tableQualifier = colRef.getTableString();
856        if (tableQualifier != null && !tableQualifier.isEmpty()) {
857            TSelectSqlStatement subquery = subNs.getSubquery();
858            if (subquery != null && subquery.tables != null) {
859                for (int i = 0; i < subquery.tables.size(); i++) {
860                    TTable table = subquery.tables.getTable(i);
861                    if (table == null) continue;
862
863                    String alias = table.getAliasName();
864                    if (alias != null && alias.equalsIgnoreCase(tableQualifier)) {
865                        return traceToPhysicalTable(table);
866                    }
867
868                    String tableName = table.getTableName() != null ? table.getTableName().toString() : null;
869                    if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) {
870                        return traceToPhysicalTable(table);
871                    }
872                }
873            }
874        }
875
876        return null;
877    }
878
879    /**
880     * Get all physical tables that this column might originate from.
881     *
882     * <p>For columns from UNION queries, this returns tables from ALL branches,
883     * not just the first one. This is essential for proper lineage tracking
884     * where a column like {@code actor_id} in a UNION query should be linked
885     * to {@code actor.actor_id}, {@code actor2.actor_id}, {@code actor3.actor_id}.</p>
886     *
887     * <p>For regular single-table sources, this returns a single-element list
888     * with the same table as {@link #getFinalTable()}.</p>
889     *
890     * @return List of all physical tables, or empty list if unable to determine
891     */
892    public java.util.List<TTable> getAllFinalTables() {
893        // If this ColumnSource has explicit candidateTables set (e.g., from UNION inference),
894        // use those instead of delegating to namespace. This is critical for UNION queries
895        // where only branches with SELECT * should contribute candidate tables for inferred columns.
896        // An EMPTY list means "no matching tables" - return it as-is without delegating.
897        // A NULL means "not determined" - delegate to namespace.
898        if (candidateTables != null) {
899            return candidateTables;
900        }
901
902        if (sourceNamespace == null) {
903            if (overrideTable != null) {
904                return java.util.Collections.singletonList(overrideTable);
905            }
906            return java.util.Collections.emptyList();
907        }
908
909        // For calculated columns and aliases in SubqueryNamespace, don't trace
910        if (sourceNamespace instanceof SubqueryNamespace) {
911            if (isCalculatedColumn() || isColumnAlias()) {
912                return java.util.Collections.emptyList();
913            }
914        }
915
916        // For CTENamespace calculated/alias/explicit columns, trace to CTE itself
917        if (sourceNamespace instanceof CTENamespace) {
918            if (isCalculatedColumn() || isColumnAlias() || isCTEExplicitColumn()) {
919                TTable cteTable = ((CTENamespace) sourceNamespace).getReferencingTable();
920                if (cteTable != null) {
921                    return java.util.Collections.singletonList(cteTable);
922                }
923                return java.util.Collections.emptyList();
924            }
925        }
926
927        // Delegate to namespace - handles UNION queries via UnionNamespace.getAllFinalTables()
928        return sourceNamespace.getAllFinalTables();
929    }
930
931    /**
932     * Check if this column is a passthrough reference to an underlying alias.
933     *
934     * <p>A passthrough column is a simple column reference in a subquery that
935     * references another column from its FROM clause. If that underlying column
936     * is an alias, then this passthrough should not trace to the base table.</p>
937     *
938     * <p>Example: In {@code SELECT stat_typ FROM (SELECT stat_typ = col FROM t) AS b},
939     * the outer {@code stat_typ} is a passthrough to {@code b.stat_typ}, which is an alias.</p>
940     *
941     * @return true if this is a passthrough to an alias
942     */
943    private boolean isPassthroughToAlias() {
944        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
945            return false;
946        }
947
948        TResultColumn rc = (TResultColumn) definitionNode;
949        TExpression expr = rc.getExpr();
950        if (expr == null) {
951            return false;
952        }
953
954        // Only check simple column references (passthroughs)
955        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
956            return false;
957        }
958
959        // If this column itself has an alias that differs, it's already handled by isColumnAlias()
960        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
961            return false;
962        }
963
964        // Get the column name being referenced
965        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
966        if (objName == null) {
967            return false;
968        }
969        String columnName = objName.getColumnNameOnly();
970        if (columnName == null || columnName.isEmpty()) {
971            return false;
972        }
973
974        // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource
975        if (sourceNamespace instanceof SubqueryNamespace) {
976            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
977            ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName);
978            if (underlyingSource != null) {
979                // Check if the underlying column is an alias or calculated
980                if (underlyingSource.isColumnAlias() || underlyingSource.isCalculatedColumn()) {
981                    return true;
982                }
983                // Recursively check if it's a passthrough to alias
984                if (underlyingSource.isPassthroughToAlias()) {
985                    return true;
986                }
987            }
988        }
989
990        return false;
991    }
992
993    /**
994     * Check if this subquery column is a passthrough reference to a calculated column.
995     *
996     * <p>A subquery column is a passthrough to calculated if:</p>
997     * <ol>
998     *   <li>The column definition is a simple column reference (e.g., kko_lfz_9)</li>
999     *   <li>The referenced column in the FROM scope is calculated (CASE, function, etc.)</li>
1000     * </ol>
1001     *
1002     * <p>Example:</p>
1003     * <pre>
1004     * SELECT kko_lfz_9 AS KKO_LFZ_9
1005     * FROM (SELECT CASE WHEN... END AS kko_lfz_9 FROM t) subq
1006     * </pre>
1007     * <p>Here, kko_lfz_9 in the outer query is a passthrough to a calculated column in subq.</p>
1008     *
1009     * <p>This differs from {@link #isPassthroughToAlias()} which skips columns with aliases.
1010     * Here we check even aliased passthroughs to see if they reference calculated columns.</p>
1011     *
1012     * @return true if this is a passthrough to a calculated column in a subquery
1013     */
1014    private boolean isPassthroughToCalculatedInSubquery() {
1015        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
1016            return false;
1017        }
1018
1019        TResultColumn rc = (TResultColumn) definitionNode;
1020        TExpression expr = rc.getExpr();
1021        if (expr == null) {
1022            return false;
1023        }
1024
1025        // Only check simple column references (passthroughs)
1026        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
1027            return false;
1028        }
1029
1030        // Get the column name being referenced
1031        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
1032        if (objName == null) {
1033            return false;
1034        }
1035        String columnName = objName.getColumnNameOnly();
1036        if (columnName == null || columnName.isEmpty()) {
1037            return false;
1038        }
1039
1040        // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource
1041        if (sourceNamespace instanceof SubqueryNamespace) {
1042            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
1043            ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName);
1044            if (underlyingSource != null) {
1045                // Check if the underlying column is calculated
1046                if (underlyingSource.isCalculatedColumn()) {
1047                    return true;
1048                }
1049                // Recursively check if it's a passthrough to calculated
1050                if (underlyingSource.isPassthroughToCalculatedInSubquery()) {
1051                    return true;
1052                }
1053            }
1054        }
1055
1056        return false;
1057    }
1058
1059    /**
1060     * Check if this CTE column is a passthrough reference to a calculated column in a subquery or nested CTE.
1061     *
1062     * <p>A CTE column is a passthrough to calculated if:</p>
1063     * <ol>
1064     *   <li>The column definition is a simple qualified column reference (e.g., subq.calc_col or cte.calc_col)</li>
1065     *   <li>The qualifier refers to a subquery or CTE in the CTE's body</li>
1066     *   <li>The referenced column in that subquery/CTE is calculated (CASE, function, etc.)</li>
1067     * </ol>
1068     *
1069     * <p>Example with subquery:</p>
1070     * <pre>
1071     * WITH DataCTE AS (
1072     *   SELECT ErrorCountsCTE.ErrorSeverityCategory  -- passthrough
1073     *   FROM (SELECT CASE...END AS ErrorSeverityCategory FROM t) ErrorCountsCTE
1074     * )
1075     * </pre>
1076     *
1077     * <p>Example with nested CTE:</p>
1078     * <pre>
1079     * WITH attendance_summary AS (
1080     *   SELECT date_trunc('month', attendance_date) as month FROM attendance
1081     * )
1082     * WITH outer_cte AS (
1083     *   SELECT a.month FROM attendance_summary a  -- passthrough to calculated in nested CTE
1084     * )
1085     * </pre>
1086     *
1087     * @return true if this is a passthrough to a calculated column in a CTE
1088     */
1089    private boolean isPassthroughToCalculatedInCTE() {
1090        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
1091            return false;
1092        }
1093
1094        TResultColumn rc = (TResultColumn) definitionNode;
1095        TExpression expr = rc.getExpr();
1096        if (expr == null) {
1097            return false;
1098        }
1099
1100        // Only check simple qualified column references (passthroughs like subq.column)
1101        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
1102            return false;
1103        }
1104
1105        // Get the column reference
1106        gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
1107        if (objName == null) {
1108            return false;
1109        }
1110
1111        // Must have a table qualifier (e.g., "ErrorCountsCTE" in "ErrorCountsCTE.ErrorSeverityCategory")
1112        String tableQualifier = objName.getTableString();
1113        if (tableQualifier == null || tableQualifier.isEmpty()) {
1114            return false;
1115        }
1116
1117        String columnName = objName.getColumnNameOnly();
1118        if (columnName == null || columnName.isEmpty()) {
1119            return false;
1120        }
1121
1122        // Get the CTE's subquery to find the referenced subquery alias
1123        if (!(sourceNamespace instanceof CTENamespace)) {
1124            return false;
1125        }
1126
1127        CTENamespace cteNs = (CTENamespace) sourceNamespace;
1128        gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE();
1129        if (cte == null || cte.getSubquery() == null) {
1130            return false;
1131        }
1132
1133        // Find the subquery/table with this alias in the CTE's body
1134        gudusoft.gsqlparser.stmt.TSelectSqlStatement cteBody = cte.getSubquery();
1135        TTable referencedTable = findTableByAlias(cteBody, tableQualifier);
1136        if (referencedTable == null) {
1137            return false;
1138        }
1139
1140        // Case 1: Referenced table is a subquery
1141        if (referencedTable.getSubquery() != null) {
1142            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = referencedTable.getSubquery();
1143            return isCalculatedColumnInSelect(subquery, columnName);
1144        }
1145
1146        // Case 2: Referenced table is a CTE reference
1147        if (referencedTable.isCTEName() && referencedTable.getCTE() != null) {
1148            gudusoft.gsqlparser.nodes.TCTE referencedCTE = referencedTable.getCTE();
1149            if (referencedCTE.getSubquery() != null) {
1150                return isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName);
1151            }
1152        }
1153
1154        return false;
1155    }
1156
1157    /**
1158     * Find a table in a SELECT statement by its alias.
1159     */
1160    private TTable findTableByAlias(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String alias) {
1161        if (select == null || select.tables == null || alias == null) {
1162            return null;
1163        }
1164
1165        for (int i = 0; i < select.tables.size(); i++) {
1166            TTable table = select.tables.getTable(i);
1167            if (table != null) {
1168                String tableAlias = table.getAliasName();
1169                if (tableAlias != null && tableAlias.equalsIgnoreCase(alias)) {
1170                    return table;
1171                }
1172                // Also check table name for non-aliased references
1173                if (tableAlias == null && table.getTableName() != null) {
1174                    String tableName = table.getTableName().toString();
1175                    if (tableName != null && tableName.equalsIgnoreCase(alias)) {
1176                        return table;
1177                    }
1178                }
1179            }
1180        }
1181        return null;
1182    }
1183
1184    /**
1185     * Check if a column in a SELECT statement is calculated (not a simple column reference).
1186     */
1187    private boolean isCalculatedColumnInSelect(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1188        if (select == null || select.getResultColumnList() == null || columnName == null) {
1189            return false;
1190        }
1191
1192        for (int i = 0; i < select.getResultColumnList().size(); i++) {
1193            TResultColumn rc = select.getResultColumnList().getResultColumn(i);
1194            if (rc == null) continue;
1195
1196            // Get the column name for this result column
1197            String rcName = null;
1198            if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1199                rcName = rc.getAliasClause().getAliasName().toString();
1200            } else if (rc.getExpr() != null &&
1201                       rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t &&
1202                       rc.getExpr().getObjectOperand() != null) {
1203                rcName = rc.getExpr().getObjectOperand().getColumnNameOnly();
1204            }
1205
1206            if (rcName != null && rcName.equalsIgnoreCase(columnName)) {
1207                // Found the column - check if it's calculated
1208                TExpression expr = rc.getExpr();
1209                if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) {
1210                    // Non-simple expression = calculated
1211                    return true;
1212                }
1213            }
1214        }
1215        return false;
1216    }
1217
1218    /**
1219     * Check if this column source represents a calculated expression.
1220     *
1221     * <p>A column is calculated if its definition is a TResultColumn with
1222     * a non-simple expression (not a direct column reference or star).</p>
1223     *
1224     * <p>For inferred columns (via star expansion), we trace back to the
1225     * source CTE/subquery to check if the original column is calculated.</p>
1226     *
1227     * @return true if this is a calculated column
1228     */
1229    public boolean isCalculatedColumn() {
1230        if (definitionNode == null) {
1231            // For inferred columns through star expansion, check if the underlying
1232            // column in the source CTE/subquery is calculated
1233            return isInferredFromCalculatedColumn();
1234        }
1235
1236        if (!(definitionNode instanceof TResultColumn)) {
1237            return false;
1238        }
1239
1240        TResultColumn rc = (TResultColumn) definitionNode;
1241        TExpression expr = rc.getExpr();
1242        if (expr == null) {
1243            return false;
1244        }
1245
1246        EExpressionType exprType = expr.getExpressionType();
1247
1248        // Simple column reference - NOT calculated (passthrough)
1249        if (exprType == EExpressionType.simple_object_name_t) {
1250            return false;
1251        }
1252
1253        // Star column - NOT calculated (passthrough)
1254        String colText = rc.toString();
1255        if (colText != null && colText.endsWith("*")) {
1256            return false;
1257        }
1258
1259        // SQL Server proprietary column alias (col = expr)
1260        if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) {
1261            if (expr.getRightOperand() != null &&
1262                expr.getRightOperand().getExpressionType() == EExpressionType.simple_object_name_t) {
1263                return false;
1264            }
1265        }
1266
1267        // Any other expression type is calculated
1268        return true;
1269    }
1270
1271    /**
1272     * Check if this is an inferred column (via star expansion) that originates from
1273     * a calculated column in the source CTE/subquery.
1274     *
1275     * <p>When a column is resolved through star expansion (e.g., SELECT * FROM CTE),
1276     * the definitionNode is null. We need to trace back to the source namespace
1277     * to check if the original column is calculated.</p>
1278     *
1279     * @return true if this inferred column traces back to a calculated column
1280     */
1281    private boolean isInferredFromCalculatedColumn() {
1282        // Only check for inferred columns (evidence contains "auto_inferred")
1283        if (evidence == null || !evidence.contains("auto_inferred")) {
1284            return false;
1285        }
1286
1287        // Need the source namespace and column name to trace
1288        if (sourceNamespace == null || exposedName == null) {
1289            return false;
1290        }
1291
1292        // For CTE namespace, check if the column is calculated in the CTE's SELECT list
1293        if (sourceNamespace instanceof CTENamespace) {
1294            CTENamespace cteNs = (CTENamespace) sourceNamespace;
1295            gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE();
1296            if (cte != null && cte.getSubquery() != null) {
1297                // First check the CTE's direct SELECT list
1298                if (isCalculatedColumnInSelect(cte.getSubquery(), exposedName)) {
1299                    return true;
1300                }
1301
1302                // If the CTE has a star column, trace through to referenced CTEs
1303                if (cteNs.hasStarColumn()) {
1304                    return isCalculatedInCTEChain(cte.getSubquery(), exposedName);
1305                }
1306            }
1307        }
1308
1309        // For Subquery namespace, check if the column is calculated in the subquery's SELECT list
1310        if (sourceNamespace instanceof SubqueryNamespace) {
1311            SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace;
1312            gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery();
1313            if (subquery != null) {
1314                // First check the subquery's direct SELECT list
1315                if (isCalculatedColumnInSelect(subquery, exposedName)) {
1316                    return true;
1317                }
1318
1319                // If the subquery has a star column, trace through to source tables
1320                if (subNs.hasStarColumn()) {
1321                    return isCalculatedInSubqueryChain(subquery, exposedName);
1322                }
1323            }
1324        }
1325
1326        return false;
1327    }
1328
1329    /**
1330     * Check if a column is calculated by tracing through CTE references.
1331     * This handles cases like Stage4 -> Stage3 -> Stage2 where the column
1332     * is calculated at some intermediate level.
1333     */
1334    private boolean isCalculatedInCTEChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1335        if (select == null || select.tables == null) {
1336            return false;
1337        }
1338
1339        // Look for CTE references in the FROM clause
1340        for (int i = 0; i < select.tables.size(); i++) {
1341            TTable table = select.tables.getTable(i);
1342            if (table != null && table.isCTEName() && table.getCTE() != null) {
1343                gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE();
1344                if (referencedCTE.getSubquery() != null) {
1345                    // Check if the column is calculated in this CTE
1346                    if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) {
1347                        return true;
1348                    }
1349                    // Recursively check the CTE chain
1350                    if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) {
1351                        return true;
1352                    }
1353                }
1354            }
1355        }
1356        return false;
1357    }
1358
1359    /**
1360     * Check if a column is calculated by tracing through subquery references.
1361     */
1362    private boolean isCalculatedInSubqueryChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) {
1363        if (select == null || select.tables == null) {
1364            return false;
1365        }
1366
1367        // Look for subquery tables in the FROM clause
1368        for (int i = 0; i < select.tables.size(); i++) {
1369            TTable table = select.tables.getTable(i);
1370            if (table != null && table.getSubquery() != null) {
1371                gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery();
1372                // Check if the column is calculated in this subquery
1373                if (isCalculatedColumnInSelect(subquery, columnName)) {
1374                    return true;
1375                }
1376                // Recursively check the subquery chain
1377                if (isCalculatedInSubqueryChain(subquery, columnName)) {
1378                    return true;
1379                }
1380            }
1381            // Also check CTE references within subqueries
1382            if (table != null && table.isCTEName() && table.getCTE() != null) {
1383                gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE();
1384                if (referencedCTE.getSubquery() != null) {
1385                    if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) {
1386                        return true;
1387                    }
1388                    if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) {
1389                        return true;
1390                    }
1391                }
1392            }
1393        }
1394        return false;
1395    }
1396
1397    /**
1398     * Check if this column source represents a column alias (renamed column).
1399     *
1400     * <p>A column is an alias if it's a simple column reference in a subquery
1401     * that has been given a different name via AS or NAMED. For example:</p>
1402     * <ul>
1403     *   <li>{@code SELECT col AS alias FROM table} - alias is different from col</li>
1404     *   <li>{@code SELECT col (NAMED alias) FROM table} - Teradata NAMED syntax</li>
1405     *   <li>{@code SELECT alias = col FROM table} - SQL Server proprietary syntax</li>
1406     * </ul>
1407     *
1408     * <p>Column aliases are traced through in {@link #getFinalTable()} to find the
1409     * physical table the data originates from, since the alias only renames the column
1410     * but the data still comes from the physical table.</p>
1411     *
1412     * @return true if this is a column alias with a different name than the original
1413     */
1414    public boolean isColumnAlias() {
1415        if (definitionNode == null) {
1416            return false;
1417        }
1418
1419        if (!(definitionNode instanceof TResultColumn)) {
1420            return false;
1421        }
1422
1423        TResultColumn rc = (TResultColumn) definitionNode;
1424        TExpression expr = rc.getExpr();
1425        if (expr == null) {
1426            return false;
1427        }
1428
1429        EExpressionType exprType = expr.getExpressionType();
1430
1431        // Handle SQL Server proprietary alias syntax: alias = column
1432        // Example: stat_typ = stellplatz_typ
1433        if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) {
1434            TExpression rightExpr = expr.getRightOperand();
1435            TExpression leftExpr = expr.getLeftOperand();
1436            // Only if right side is a simple column reference
1437            if (rightExpr != null && leftExpr != null &&
1438                rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) {
1439                gudusoft.gsqlparser.nodes.TObjectName rightObjName = rightExpr.getObjectOperand();
1440                gudusoft.gsqlparser.nodes.TObjectName leftObjName = leftExpr.getObjectOperand();
1441                if (rightObjName != null && leftObjName != null) {
1442                    String origName = rightObjName.getColumnNameOnly();
1443                    String aliasName = leftObjName.getColumnNameOnly();
1444                    // If alias name differs from original column name, it's an alias
1445                    if (origName != null && aliasName != null &&
1446                        !origName.equalsIgnoreCase(aliasName)) {
1447                        return true;
1448                    }
1449                }
1450            }
1451            return false;
1452        }
1453
1454        // Standard alias syntax: column AS alias
1455        // Only applies to simple column references
1456        if (exprType != EExpressionType.simple_object_name_t) {
1457            return false;
1458        }
1459
1460        // Check if there's an alias that differs from the column name
1461        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1462            String aliasName = rc.getAliasClause().getAliasName().toString();
1463            if (aliasName != null && !aliasName.isEmpty()) {
1464                gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
1465                if (objName != null) {
1466                    String origName = objName.getColumnNameOnly();
1467                    // If alias name differs from original name, it's an alias
1468                    if (origName != null && !origName.equalsIgnoreCase(aliasName)) {
1469                        return true;
1470                    }
1471                }
1472            }
1473        }
1474
1475        return false;
1476    }
1477
1478    /**
1479     * Check if this column is a CTE explicit column with a different name than the underlying column.
1480     *
1481     * <p>A CTE explicit column is one defined in the CTE's column list that maps to a
1482     * different column name in the CTE's SELECT list. For example:</p>
1483     * <pre>
1484     * WITH cte(c1, c2) AS (SELECT id, name FROM users)
1485     * SELECT c1 FROM cte  -- c1 maps to 'id', names differ
1486     * </pre>
1487     *
1488     * <p>CTE explicit columns should NOT trace to base tables because the explicit
1489     * column name (c1) doesn't exist as an actual column in the base table (users).</p>
1490     *
1491     * @return true if this is a CTE explicit column with a different name
1492     */
1493    public boolean isCTEExplicitColumn() {
1494        // Must be from a CTENamespace
1495        if (!(sourceNamespace instanceof CTENamespace)) {
1496            return false;
1497        }
1498
1499        // Check evidence for explicit column marker
1500        if (!"cte_explicit_column".equals(evidence)) {
1501            return false;
1502        }
1503
1504        // Get the underlying column name from the definition node
1505        if (definitionNode == null || !(definitionNode instanceof TResultColumn)) {
1506            return false;
1507        }
1508
1509        TResultColumn rc = (TResultColumn) definitionNode;
1510        TExpression expr = rc.getExpr();
1511        if (expr == null) {
1512            return false;
1513        }
1514
1515        // Get the column name from the SELECT list item
1516        String underlyingName = null;
1517
1518        // Check for alias first
1519        if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) {
1520            underlyingName = rc.getAliasClause().getAliasName().toString();
1521        }
1522        // Then check for simple column reference
1523        else if (expr.getExpressionType() == EExpressionType.simple_object_name_t &&
1524                 expr.getObjectOperand() != null) {
1525            underlyingName = expr.getObjectOperand().getColumnNameOnly();
1526        }
1527
1528        // If we can't determine the underlying name, assume it's different
1529        // (calculated expressions, etc. are definitely different from explicit column names)
1530        if (underlyingName == null) {
1531            return true;
1532        }
1533
1534        // If the exposed name differs from the underlying column name, it's an explicit column rename
1535        return !exposedName.equalsIgnoreCase(underlyingName);
1536    }
1537
1538    /**
1539     * Get the override table, if set.
1540     */
1541    public TTable getOverrideTable() {
1542        return overrideTable;
1543    }
1544
1545    /**
1546     * Get the candidate tables for ambiguous columns.
1547     *
1548     * <p>When a column could come from multiple tables (e.g., SELECT * FROM t1, t2),
1549     * this returns all possible source tables. End users can iterate through this
1550     * list to understand all potential sources for the column.</p>
1551     *
1552     * @return List of candidate tables, or empty list if not ambiguous
1553     */
1554    public List<TTable> getCandidateTables() {
1555        return candidateTables != null ? candidateTables : Collections.emptyList();
1556    }
1557
1558    /**
1559     * Check if this column has multiple candidate tables (is ambiguous).
1560     *
1561     * @return true if there are multiple candidate tables
1562     */
1563    public boolean isAmbiguous() {
1564        return candidateTables != null && candidateTables.size() > 1;
1565    }
1566
1567    /**
1568     * Get the field path for deep/record field access.
1569     *
1570     * <p>When a column reference includes field access beyond the base column,
1571     * this returns the field path. For example, in {@code customer.address.city},
1572     * if base column is {@code customer}, this returns a FieldPath with
1573     * segments {@code ["address", "city"]}.</p>
1574     *
1575     * @return The field path, or null if no field access
1576     */
1577    public FieldPath getFieldPath() {
1578        return fieldPath;
1579    }
1580
1581    /**
1582     * Check if this column source has a field path (deep/record field access).
1583     *
1584     * @return true if a non-empty field path exists
1585     */
1586    public boolean hasFieldPath() {
1587        return fieldPath != null && !fieldPath.isEmpty();
1588    }
1589
1590    /**
1591     * Check if this is a struct field access (has evidence "struct_field_access").
1592     *
1593     * <p>This is a convenience method for checking if this column source represents
1594     * a struct/record field dereference operation.</p>
1595     *
1596     * @return true if this is a struct field access
1597     */
1598    public boolean isStructFieldAccess() {
1599        return "struct_field_access".equals(evidence);
1600    }
1601
1602    /**
1603     * Checks if this is a definite resolution (confidence = 1.0)
1604     */
1605    public boolean isDefinite() {
1606        return confidence >= 1.0;
1607    }
1608
1609    /**
1610     * Checks if this is an inferred resolution (confidence < 1.0)
1611     */
1612    public boolean isInferred() {
1613        return confidence < 1.0;
1614    }
1615
1616    @Override
1617    public String toString() {
1618        StringBuilder sb = new StringBuilder();
1619        sb.append(exposedName);
1620        if (sourceNamespace != null) {
1621            sb.append(" from ").append(sourceNamespace.getDisplayName());
1622        }
1623        if (confidence < 1.0) {
1624            sb.append(String.format(" (confidence: %.2f)", confidence));
1625        }
1626        return sb.toString();
1627    }
1628
1629    /**
1630     * Creates a copy with updated confidence and evidence.
1631     * Used when merging or updating inference results.
1632     *
1633     * @deprecated Use {@link #withEvidence(ResolutionEvidence)} instead
1634     */
1635    public ColumnSource withConfidence(double newConfidence, String newEvidence) {
1636        return new ColumnSource(
1637            this.sourceNamespace,
1638            this.exposedName,
1639            this.definitionNode,
1640            newConfidence,
1641            newEvidence,
1642            this.overrideTable,
1643            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1644            null, // will create from legacy evidence
1645            this.fieldPath
1646        );
1647    }
1648
1649    /**
1650     * Creates a copy with updated ResolutionEvidence.
1651     * This is the preferred method for updating evidence in new code.
1652     *
1653     * @param newEvidence The new evidence detail
1654     * @return A new ColumnSource with updated evidence
1655     */
1656    public ColumnSource withEvidence(ResolutionEvidence newEvidence) {
1657        return new ColumnSource(
1658            this.sourceNamespace,
1659            this.exposedName,
1660            this.definitionNode,
1661            newEvidence != null ? newEvidence.getWeight() : this.confidence,
1662            newEvidence != null ? newEvidence.toLegacyEvidence() : this.evidence,
1663            this.overrideTable,
1664            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1665            newEvidence,
1666            this.fieldPath
1667        );
1668    }
1669
1670    /**
1671     * Creates a copy with candidate tables.
1672     * Used when a column could come from multiple tables.
1673     */
1674    public ColumnSource withCandidateTables(List<TTable> candidates) {
1675        return new ColumnSource(
1676            this.sourceNamespace,
1677            this.exposedName,
1678            this.definitionNode,
1679            this.confidence,
1680            this.evidence,
1681            this.overrideTable,
1682            candidates != null ? new java.util.ArrayList<>(candidates) : null,
1683            this.evidenceDetail,
1684            this.fieldPath
1685        );
1686    }
1687
1688    /**
1689     * Creates a copy with a field path for deep/record field access.
1690     *
1691     * <p>This method is used when resolving struct/record field access patterns
1692     * like {@code customer.address.city}. The base column is preserved as the
1693     * exposedName, and the field path captures the remaining segments.</p>
1694     *
1695     * @param newFieldPath The field path segments (beyond the base column)
1696     * @return A new ColumnSource with the field path set
1697     */
1698    public ColumnSource withFieldPath(FieldPath newFieldPath) {
1699        return new ColumnSource(
1700            this.sourceNamespace,
1701            this.exposedName,
1702            this.definitionNode,
1703            this.confidence,
1704            this.evidence,
1705            this.overrideTable,
1706            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1707            this.evidenceDetail,
1708            newFieldPath
1709        );
1710    }
1711
1712    /**
1713     * Creates a copy with a field path from a list of segments.
1714     *
1715     * <p>Convenience method for creating a ColumnSource with a field path
1716     * from a list of string segments.</p>
1717     *
1718     * @param segments The field path segments
1719     * @return A new ColumnSource with the field path set
1720     */
1721    public ColumnSource withFieldPath(List<String> segments) {
1722        return withFieldPath(FieldPath.of(segments));
1723    }
1724
1725    /**
1726     * Creates a copy with field path and updated evidence.
1727     *
1728     * <p>This method is used when resolving struct field access, combining
1729     * both the field path and the struct_field_access evidence marker.</p>
1730     *
1731     * @param newFieldPath The field path segments
1732     * @param newEvidence The evidence string (e.g., "struct_field_access")
1733     * @return A new ColumnSource with field path and evidence updated
1734     */
1735    public ColumnSource withFieldPath(FieldPath newFieldPath, String newEvidence) {
1736        return new ColumnSource(
1737            this.sourceNamespace,
1738            this.exposedName,
1739            this.definitionNode,
1740            this.confidence,
1741            newEvidence,
1742            this.overrideTable,
1743            this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null,
1744            null, // will create from legacy evidence
1745            newFieldPath
1746        );
1747    }
1748
1749    /**
1750     * Count the number of "real" tables in a table list, excluding implicit lateral derived tables.
1751     *
1752     * <p>Teradata supports implicit lateral derived tables, which are auto-added when a column
1753     * references an undeclared table in the WHERE clause. These should not be counted when
1754     * determining if a subquery has multiple tables for column resolution purposes.</p>
1755     *
1756     * @param tables The table list to count
1757     * @return The number of real (non-implicit) tables
1758     */
1759    private static int countRealTables(gudusoft.gsqlparser.nodes.TTableList tables) {
1760        if (tables == null) {
1761            return 0;
1762        }
1763        int count = 0;
1764        for (int i = 0; i < tables.size(); i++) {
1765            TTable table = tables.getTable(i);
1766            if (table != null && table.getEffectType() != gudusoft.gsqlparser.ETableEffectType.tetImplicitLateralDerivedTable) {
1767                count++;
1768            }
1769        }
1770        return count;
1771    }
1772
1773    /**
1774     * Check if a SELECT statement has a qualified star column (e.g., ta.*, tb.*).
1775     * Qualified stars identify which table columns come from in multi-table subqueries.
1776     */
1777    private static boolean hasQualifiedStar(gudusoft.gsqlparser.stmt.TSelectSqlStatement select) {
1778        if (select == null || select.getResultColumnList() == null) {
1779            return false;
1780        }
1781        gudusoft.gsqlparser.nodes.TResultColumnList resultCols = select.getResultColumnList();
1782        for (int i = 0; i < resultCols.size(); i++) {
1783            TResultColumn rc = resultCols.getResultColumn(i);
1784            if (rc != null) {
1785                String colStr = rc.toString().trim();
1786                // Qualified star has format "alias.*" or "table.*"
1787                if (colStr.endsWith("*") && colStr.contains(".")) {
1788                    return true;
1789                }
1790            }
1791        }
1792        return false;
1793    }
1794
1795    /**
1796     * Check if a column exists in a table's DDL definition.
1797     *
1798     * <p>This method checks the table's column definitions (from CREATE TABLE statements
1799     * parsed in the same script) to verify if the column name is defined.</p>
1800     *
1801     * @param table The table to check
1802     * @param columnName The column name to look for
1803     * @return true if the column exists in the table's DDL, false if not found or no DDL available
1804     */
1805    public static boolean isColumnInTableDdl(TTable table, String columnName) {
1806        if (table == null || columnName == null || columnName.isEmpty()) {
1807            return false;
1808        }
1809
1810        // Check if the table has column definitions (from CREATE TABLE DDL)
1811        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1812        if (columnDefs != null && columnDefs.size() > 0) {
1813            for (int i = 0; i < columnDefs.size(); i++) {
1814                gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i);
1815                if (colDef != null && colDef.getColumnName() != null) {
1816                    String defColName = colDef.getColumnName().toString();
1817                    if (defColName != null && defColName.equalsIgnoreCase(columnName)) {
1818                        return true;
1819                    }
1820                }
1821            }
1822            // DDL exists but column not found
1823            return false;
1824        }
1825
1826        // No DDL available - return false (cannot verify)
1827        return false;
1828    }
1829
1830    /**
1831     * Check if a table has DDL metadata available (from CREATE TABLE in same script).
1832     *
1833     * @param table The table to check
1834     * @return true if DDL metadata is available for this table
1835     */
1836    public static boolean hasTableDdl(TTable table) {
1837        if (table == null) {
1838            return false;
1839        }
1840        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1841        return columnDefs != null && columnDefs.size() > 0;
1842    }
1843
1844    /**
1845     * Check DDL verification status for a candidate table.
1846     *
1847     * <p>Returns a tri-state result:</p>
1848     * <ul>
1849     *   <li>1 = Column exists in table's DDL</li>
1850     *   <li>0 = Column NOT found in table's DDL (DDL available but column missing)</li>
1851     *   <li>-1 = Cannot verify (no DDL available for this table)</li>
1852     * </ul>
1853     *
1854     * @param table The candidate table to check
1855     * @param columnName The column name to verify
1856     * @return DDL verification status: 1 (exists), 0 (not found), -1 (no DDL)
1857     */
1858    public static int getDdlVerificationStatus(TTable table, String columnName) {
1859        if (table == null || columnName == null) {
1860            return -1;
1861        }
1862
1863        gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions();
1864        if (columnDefs == null || columnDefs.size() == 0) {
1865            return -1; // No DDL available
1866        }
1867
1868        // DDL available - check if column exists
1869        for (int i = 0; i < columnDefs.size(); i++) {
1870            gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i);
1871            if (colDef != null && colDef.getColumnName() != null) {
1872                String defColName = colDef.getColumnName().toString();
1873                if (defColName != null && defColName.equalsIgnoreCase(columnName)) {
1874                    return 1; // Column exists in DDL
1875                }
1876            }
1877        }
1878
1879        return 0; // DDL exists but column not found
1880    }
1881
1882    /**
1883     * Get DDL verification status for all candidate tables.
1884     *
1885     * <p>Returns a map from each candidate table to its DDL verification status:</p>
1886     * <ul>
1887     *   <li>1 = Column exists in table's DDL</li>
1888     *   <li>0 = Column NOT found in table's DDL</li>
1889     *   <li>-1 = Cannot verify (no DDL available)</li>
1890     * </ul>
1891     *
1892     * @return Map of candidate tables to their DDL verification status, or empty map if no candidates
1893     */
1894    public java.util.Map<TTable, Integer> getCandidateTableDdlStatus() {
1895        java.util.Map<TTable, Integer> result = new java.util.LinkedHashMap<>();
1896        if (candidateTables == null || candidateTables.isEmpty() || exposedName == null) {
1897            return result;
1898        }
1899
1900        for (TTable candidate : candidateTables) {
1901            int status = getDdlVerificationStatus(candidate, exposedName);
1902            result.put(candidate, status);
1903        }
1904        return result;
1905    }
1906}