001package gudusoft.gsqlparser.resolver2.model; 002 003import gudusoft.gsqlparser.EExpressionType; 004import gudusoft.gsqlparser.nodes.TExpression; 005import gudusoft.gsqlparser.nodes.TObjectName; 006import gudusoft.gsqlparser.nodes.TParseTreeNode; 007import gudusoft.gsqlparser.nodes.TResultColumn; 008import gudusoft.gsqlparser.nodes.TTable; 009import gudusoft.gsqlparser.resolver2.inference.EvidenceType; 010import gudusoft.gsqlparser.resolver2.namespace.INamespace; 011import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 012import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 013import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace; 014import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 015 016import java.util.Collections; 017import java.util.List; 018 019/** 020 * Represents the source of a column reference. 021 * Tracks where a column comes from, including intermediate transformations 022 * through subqueries and CTEs. 023 * 024 * Design principles: 025 * 1. Immutable - once created, cannot be modified 026 * 2. Recursive - can trace back through subquery/CTE layers 027 * 3. Confidence-scored - supports evidence-based inference 028 */ 029public class ColumnSource { 030 /** The namespace where this column is exposed (e.g., subquery, table) */ 031 private final INamespace sourceNamespace; 032 033 /** The name by which this column is exposed in the namespace */ 034 private final String exposedName; 035 036 /** The AST node where this column is defined (TResultColumn, TTableColumn, etc.) */ 037 private final TParseTreeNode definitionNode; 038 039 /** Location information for the definition */ 040 private final SourceLocation definitionLocation; 041 042 /** 043 * Confidence score [0.0, 1.0]: 044 * - 1.0: Definite (from metadata or explicit definition) 045 * - 0.7-0.9: High confidence inference (strong evidence) 046 * - 0.5-0.7: Medium confidence inference (some evidence) 047 * - 0.0-0.5: Low confidence guess 048 */ 049 private final double confidence; 050 051 /** 052 * Evidence that supports this resolution. 053 * Used for debugging and explaining inference decisions. 054 * 055 * @deprecated Use {@link #evidenceDetail} instead. This field is kept for backward 056 * compatibility and will be derived from evidenceDetail if not explicitly set. 057 */ 058 private final String evidence; 059 060 /** 061 * Structured evidence detail for this resolution. 062 * Provides type-safe evidence with confidence weight and source traceability. 063 * This is the preferred way to access resolution evidence. 064 * 065 * @see ResolutionEvidence 066 */ 067 private final ResolutionEvidence evidenceDetail; 068 069 /** 070 * Override table for traced columns. 071 * When set, getFinalTable() returns this instead of namespace's table. 072 */ 073 private final TTable overrideTable; 074 075 /** 076 * Candidate tables for ambiguous columns. 077 * When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 078 * this list contains all possible source tables so end users can access them. 079 */ 080 private final List<TTable> candidateTables; 081 082 /** 083 * Field path for deep/record field access (e.g., struct.field.subfield). 084 * 085 * <p>When a column reference includes field access beyond the base column, 086 * this captures the field path. For example, in {@code customer.address.city}, 087 * if base column is {@code customer}, fieldPath contains {@code ["address", "city"]}.</p> 088 * 089 * <p>This field is null or empty for regular column references without field access.</p> 090 * 091 * @see FieldPath 092 */ 093 private final FieldPath fieldPath; 094 095 public ColumnSource(INamespace sourceNamespace, 096 String exposedName, 097 TParseTreeNode definitionNode, 098 double confidence, 099 String evidence) { 100 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, null, null); 101 } 102 103 public ColumnSource(INamespace sourceNamespace, 104 String exposedName, 105 TParseTreeNode definitionNode, 106 double confidence, 107 String evidence, 108 TTable overrideTable) { 109 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, null); 110 } 111 112 public ColumnSource(INamespace sourceNamespace, 113 String exposedName, 114 TParseTreeNode definitionNode, 115 double confidence, 116 String evidence, 117 TTable overrideTable, 118 List<TTable> candidateTables) { 119 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, null, null); 120 } 121 122 /** 123 * Full constructor with all fields including ResolutionEvidence. 124 */ 125 public ColumnSource(INamespace sourceNamespace, 126 String exposedName, 127 TParseTreeNode definitionNode, 128 double confidence, 129 String evidence, 130 TTable overrideTable, 131 List<TTable> candidateTables, 132 ResolutionEvidence evidenceDetail) { 133 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, evidenceDetail, null); 134 } 135 136 /** 137 * Full constructor with all fields including ResolutionEvidence and FieldPath. 138 * 139 * @param sourceNamespace The namespace where this column is exposed 140 * @param exposedName The name by which this column is exposed 141 * @param definitionNode The AST node where this column is defined 142 * @param confidence Confidence score [0.0, 1.0] 143 * @param evidence Evidence string for this resolution 144 * @param overrideTable Override table for traced columns 145 * @param candidateTables Candidate tables for ambiguous columns 146 * @param evidenceDetail Structured evidence detail 147 * @param fieldPath Field path for deep/record field access 148 */ 149 public ColumnSource(INamespace sourceNamespace, 150 String exposedName, 151 TParseTreeNode definitionNode, 152 double confidence, 153 String evidence, 154 TTable overrideTable, 155 List<TTable> candidateTables, 156 ResolutionEvidence evidenceDetail, 157 FieldPath fieldPath) { 158 this.sourceNamespace = sourceNamespace; 159 this.exposedName = exposedName; 160 this.definitionNode = definitionNode; 161 this.definitionLocation = definitionNode != null 162 ? new SourceLocation(definitionNode) 163 : null; 164 this.confidence = Math.max(0.0, Math.min(1.0, confidence)); 165 this.evidence = evidence; 166 this.overrideTable = overrideTable; 167 this.candidateTables = candidateTables != null ? Collections.unmodifiableList(candidateTables) : null; 168 this.fieldPath = fieldPath; 169 // If evidenceDetail not provided, create from legacy evidence 170 if (evidenceDetail != null) { 171 this.evidenceDetail = evidenceDetail; 172 } else if (evidence != null) { 173 this.evidenceDetail = ResolutionEvidence.fromLegacyEvidence(evidence, confidence, definitionNode); 174 } else { 175 this.evidenceDetail = null; 176 } 177 } 178 179 /** 180 * Constructor with ResolutionEvidence (preferred for new code). 181 */ 182 public ColumnSource(INamespace sourceNamespace, 183 String exposedName, 184 TParseTreeNode definitionNode, 185 ResolutionEvidence evidenceDetail) { 186 this(sourceNamespace, exposedName, definitionNode, 187 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 188 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 189 null, null, evidenceDetail); 190 } 191 192 /** 193 * Constructor with ResolutionEvidence and override table. 194 */ 195 public ColumnSource(INamespace sourceNamespace, 196 String exposedName, 197 TParseTreeNode definitionNode, 198 ResolutionEvidence evidenceDetail, 199 TTable overrideTable) { 200 this(sourceNamespace, exposedName, definitionNode, 201 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 202 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 203 overrideTable, null, evidenceDetail); 204 } 205 206 /** 207 * Constructor for definite matches (confidence = 1.0) 208 */ 209 public ColumnSource(INamespace sourceNamespace, 210 String exposedName, 211 TParseTreeNode definitionNode) { 212 this(sourceNamespace, exposedName, definitionNode, 1.0, "metadata"); 213 } 214 215 public INamespace getSourceNamespace() { 216 return sourceNamespace; 217 } 218 219 public String getExposedName() { 220 return exposedName; 221 } 222 223 public TParseTreeNode getDefinitionNode() { 224 return definitionNode; 225 } 226 227 public SourceLocation getDefinitionLocation() { 228 return definitionLocation; 229 } 230 231 public double getConfidence() { 232 return confidence; 233 } 234 235 public String getEvidence() { 236 return evidence; 237 } 238 239 /** 240 * Get the structured evidence detail for this resolution. 241 * 242 * <p>This is the preferred way to access resolution evidence as it provides: 243 * <ul> 244 * <li>Type-safe evidence type (enum)</li> 245 * <li>Confidence weight with clear semantics</li> 246 * <li>Source location for traceability</li> 247 * <li>Human-readable messages</li> 248 * </ul> 249 * 250 * @return The structured evidence detail, or null if not available 251 */ 252 public ResolutionEvidence getEvidenceDetail() { 253 return evidenceDetail; 254 } 255 256 /** 257 * Get the evidence type from the structured evidence detail. 258 * Convenience method for common use cases. 259 * 260 * @return The evidence type, or null if no evidence detail 261 */ 262 public EvidenceType getEvidenceType() { 263 return evidenceDetail != null ? evidenceDetail.getType() : null; 264 } 265 266 /** 267 * Check if this resolution has definite evidence (not inferred). 268 * Definite evidence comes from DDL, metadata, or explicit definitions. 269 * 270 * @return true if evidence is definite 271 */ 272 public boolean hasDefiniteEvidence() { 273 if (evidenceDetail != null) { 274 return evidenceDetail.isDefinite(); 275 } 276 // Fallback: check legacy evidence and confidence 277 if (confidence >= 1.0) { 278 return true; 279 } 280 if (evidence != null) { 281 String lower = evidence.toLowerCase(); 282 return lower.contains("metadata") || lower.contains("ddl") || 283 lower.contains("explicit") || lower.contains("insert_column"); 284 } 285 return false; 286 } 287 288 /** 289 * Get the <b>final</b> physical table this column originates from after tracing 290 * through all subqueries and CTEs. 291 * 292 * <h3>Semantic Difference: getFinalTable() vs TObjectName.getSourceTable()</h3> 293 * <ul> 294 * <li><b>getFinalTable()</b> (this method): The final physical table after 295 * recursively tracing through all subqueries and CTEs. Use this for data lineage.</li> 296 * <li><b>TObjectName.getSourceTable()</b>: The immediate source in the current scope. 297 * For a column from a subquery, this points to the subquery's TTable itself.</li> 298 * </ul> 299 * 300 * <h3>Example</h3> 301 * <pre>{@code 302 * SELECT title FROM (SELECT * FROM books) sub 303 * 304 * For the 'title' column in outer SELECT: 305 * - TObjectName.getSourceTable() → TTable for subquery 'sub' (immediate source) 306 * - ColumnSource.getFinalTable() → TTable for 'books' (final physical table) 307 * }</pre> 308 * 309 * <p>For calculated columns in subqueries (expressions like {@code START_DT - x AS alias}), 310 * this returns null because such calculated columns don't originate from a physical 311 * table - they are derived values computed in the subquery.</p> 312 * 313 * <p>For aliased columns in subqueries (e.g., {@code SELECT t.id AS col1 FROM my_table t}), 314 * this traces through the alias to find the physical table, because the data still 315 * originates from the physical table even though the column has been renamed.</p> 316 * 317 * <p>Note: For CTEs, calculated columns ARE the CTE's own columns, so they trace 318 * to the CTE itself (handled by CTENamespace.getFinalTable()).</p> 319 * 320 * @return The physical table, or null if unable to determine or if calculated in subquery 321 * @see gudusoft.gsqlparser.nodes.TObjectName#getSourceTable() 322 */ 323 public TTable getFinalTable() { 324 if (sourceNamespace == null && overrideTable == null) { 325 return null; 326 } 327 328 // For SubqueryNamespace: calculated columns should NOT trace to base table 329 // They are derived values that don't exist in the underlying physical table 330 // Example: SELECT *, expr AS alias FROM table - alias is calculated, not from table 331 // 332 // IMPORTANT: Check BEFORE overrideTable to prevent alias/calculated columns 333 // from being traced to base tables even when overrideTable is explicitly set 334 if (sourceNamespace instanceof SubqueryNamespace && isCalculatedColumn()) { 335 return null; 336 } 337 338 // For CTENamespace: calculated columns ARE the CTE's own columns 339 // They should trace to the CTE itself (referencing table), NOT to underlying base tables 340 // Example: WITH cte AS (SELECT SUM(x) AS total FROM t) SELECT total FROM cte 341 // The 'total' column traces to 'cte', not to 't' 342 if (sourceNamespace instanceof CTENamespace && isCalculatedColumn()) { 343 return ((CTENamespace) sourceNamespace).getReferencingTable(); 344 } 345 346 // For SubqueryNamespace: column aliases - trace through to find the source table. 347 // The alias changes the column name but the data still originates from a physical table. 348 // Example: SELECT t.id AS col1 FROM my_table t - col1's data comes from my_table 349 if (sourceNamespace instanceof SubqueryNamespace && isColumnAlias()) { 350 return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace); 351 } 352 353 // For CTENamespace: column aliases ARE the CTE's own columns 354 // They should trace to the CTE itself, NOT to underlying base tables 355 // Example: WITH cte AS (SELECT x AS y FROM t) SELECT y FROM cte 356 // The 'y' column traces to 'cte', not to 't' 357 if (sourceNamespace instanceof CTENamespace && isColumnAlias()) { 358 return ((CTENamespace) sourceNamespace).getReferencingTable(); 359 } 360 361 // For CTENamespace: explicit column names ARE the CTE's own columns 362 // They should trace to the CTE itself, NOT to underlying base tables 363 // Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) SELECT c1 FROM cte 364 // The 'c1' column traces to 'cte', not to 't' (because 'c1' doesn't exist in 't') 365 if (sourceNamespace instanceof CTENamespace && isCTEExplicitColumn()) { 366 return ((CTENamespace) sourceNamespace).getReferencingTable(); 367 } 368 369 // For SubqueryNamespace: passthrough columns that reference calculated columns should NOT trace 370 // IMPORTANT: This check must come BEFORE isPassthroughToAlias() because 371 // isPassthroughToAlias() returns true for BOTH alias and calculated passthroughs. 372 // Example: SELECT kko_lfz_9 FROM (SELECT CASE...END AS kko_lfz_9 FROM t) subq 373 // The outer kko_lfz_9 references a calculated column in the subquery 374 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToCalculatedInSubquery()) { 375 return null; 376 } 377 378 // For SubqueryNamespace: passthrough columns that reference aliases - trace through 379 // to find the source table. The data still originates from a physical table. 380 // Example: SELECT stat_typ FROM (SELECT stat_typ = stellplatz_typ FROM t) AS b 381 // The stat_typ in outer query references b.stat_typ → traces to t 382 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToAlias()) { 383 return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace); 384 } 385 386 // For CTENamespace: passthrough columns that reference calculated subquery columns should NOT trace 387 // Example: WITH DataCTE AS (SELECT subq.calc_col FROM (SELECT CASE...END AS calc_col FROM t) subq) 388 // The CTE column calc_col references a calculated column in the subquery 389 if (sourceNamespace instanceof CTENamespace && isPassthroughToCalculatedInCTE()) { 390 return null; 391 } 392 393 // For CTE explicit column + star pattern: c1/c2/c3 trace to the star, NOT through the star 394 // Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees) 395 // Without metadata, c1 traces to Employees.* (the star), not Employees.c1 (which doesn't exist) 396 // The evidence "cte_explicit_column_via_star" indicates this pattern 397 if ("cte_explicit_column_via_star".equals(evidence)) { 398 return null; 399 } 400 401 // If an override table is set (e.g., for traced columns), use it 402 if (overrideTable != null) { 403 return overrideTable; 404 } 405 406 if (sourceNamespace == null) { 407 return null; 408 } 409 410 // For UnionNamespace: UNION columns don't belong to any specific physical table. 411 // They're a combination of multiple branches. UnionNamespace.getFinalTable() returns 412 // the first branch's table which is incorrect for tracking column origins. 413 if (sourceNamespace instanceof UnionNamespace) { 414 return null; 415 } 416 417 // For SubqueryNamespace without override table: if the subquery has multiple tables 418 // AND no qualified star to identify the source, we can't determine which table 419 // the column comes from. Returning the first table would be incorrect. 420 // Example: FROM CDS_H_PARTNER PAR, (SELECT kategorie ... FROM CDS_H_KUNDEN_OBJEKT) subq 421 // But if there's a qualified star like "ta.*", that identifies the source table. 422 // 423 // IMPORTANT: For Teradata, implicit lateral derived tables (auto-added tables when 424 // a column references an undeclared table in WHERE clause) should be excluded from 425 // the multiple-table count. These are syntactic sugar and shouldn't affect column 426 // resolution to the actual source table. 427 if (sourceNamespace instanceof SubqueryNamespace && overrideTable == null) { 428 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 429 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 430 if (subquery != null && subquery.tables != null) { 431 // Count only real tables (excluding implicit lateral derived tables) 432 int realTableCount = countRealTables(subquery.tables); 433 if (realTableCount > 1) { 434 // Check if there's a qualified star that can identify the source 435 if (!hasQualifiedStar(subquery)) { 436 return null; 437 } 438 } 439 } 440 } 441 442 // For CTENamespace with multiple tables (e.g., JOIN): trace the specific column 443 // to its correct source table using the definitionNode 444 // Example: WITH cte AS (SELECT m.album_id, b.band_name FROM albums m JOIN bands b ...) 445 // When tracing 'band_name', we need to find it comes from 'b' (bands), not 'm' (albums) 446 if (sourceNamespace instanceof CTENamespace) { 447 CTENamespace cteNs = (CTENamespace) sourceNamespace; 448 TTable tracedTable = null; 449 450 if (definitionNode instanceof TResultColumn) { 451 // Direct case: definitionNode is a TResultColumn from the CTE's SELECT list 452 tracedTable = traceColumnThroughCTE(cteNs, (TResultColumn) definitionNode); 453 } else { 454 // Indirect case: The column might be traced through a star column 455 // Try to find the column by name in the CTE chain 456 tracedTable = traceColumnByNameThroughCTE(cteNs, exposedName); 457 } 458 459 if (tracedTable != null) { 460 return tracedTable; 461 } 462 463 // For CTEs with multiple tables, if tracing failed (unqualified column), 464 // return null instead of the first table to avoid incorrect lineage. 465 // Example: WITH cte AS (SELECT musicians.id, musician_name, music_bands.band_name 466 // FROM musicians JOIN ... JOIN music_bands) 467 // The unqualified 'musician_name' cannot be traced to any specific table 468 // without metadata, so we should NOT guess and pick the first table. 469 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 470 if (cteSelect != null && cteSelect.tables != null) { 471 int tableCount = countRealTables(cteSelect.tables); 472 if (tableCount > 1) { 473 // Cannot determine which table - don't guess 474 return null; 475 } 476 } 477 } 478 479 return sourceNamespace.getFinalTable(); 480 } 481 482 /** 483 * Get the original column name in the physical table when this column is an alias. 484 * 485 * <p>When a column is aliased in a subquery (e.g., {@code SELECT t.id AS col1}), 486 * the exposed name is {@code col1} but the original column in the physical table 487 * is {@code id}. This method returns {@code id} so callers can pair the correct 488 * column name with the physical table returned by {@link #getFinalTable()}.</p> 489 * 490 * <p>For multi-level aliases (e.g., {@code SELECT ADCS_MSISDN FROM (SELECT MSISDN AS ADCS_MSISDN FROM t) sub}), 491 * this recursively traces through all levels to find the original column name ({@code MSISDN}).</p> 492 * 493 * @return The original column name if this is an alias, or null if not an alias 494 * or unable to determine 495 */ 496 public String getFinalColumnName() { 497 return getFinalColumnNameInternal(0); 498 } 499 500 /** 501 * Internal recursive implementation of getFinalColumnName with depth limit. 502 */ 503 private String getFinalColumnNameInternal(int depth) { 504 if (depth > 10) return null; // safety limit for deeply nested aliases 505 506 if (!isColumnAlias() && !isPassthroughToAlias()) { 507 return null; 508 } 509 510 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 511 return null; 512 } 513 514 TResultColumn rc = (TResultColumn) definitionNode; 515 TExpression expr = rc.getExpr(); 516 if (expr == null) { 517 return null; 518 } 519 520 TObjectName colRef = null; 521 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 522 colRef = expr.getObjectOperand(); 523 } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) { 524 TExpression rightExpr = expr.getRightOperand(); 525 if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 526 colRef = rightExpr.getObjectOperand(); 527 } 528 } 529 530 if (colRef != null) { 531 // Check if this inner column reference itself has a ColumnSource that's also an alias. 532 // If so, recursively trace to get the deepest original column name. 533 ColumnSource innerSource = colRef.getColumnSource(); 534 if (innerSource != null && (innerSource.isColumnAlias() || innerSource.isPassthroughToAlias())) { 535 String deeperName = innerSource.getFinalColumnNameInternal(depth + 1); 536 if (deeperName != null) { 537 return deeperName; 538 } 539 } 540 return colRef.getColumnNameOnly(); 541 } 542 return null; 543 } 544 545 /** 546 * Trace a column by name through a CTE to find its correct source table. 547 * This handles the case when the column is traced through star columns 548 * and we don't have the direct TResultColumn definition. 549 * 550 * @param cteNs The CTENamespace to trace through 551 * @param columnName The name of the column to find 552 * @return The correct source table, or null if unable to determine 553 */ 554 private TTable traceColumnByNameThroughCTE(CTENamespace cteNs, String columnName) { 555 if (columnName == null || columnName.isEmpty()) { 556 return null; 557 } 558 559 // Get the CTE's SELECT statement 560 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 561 if (cteSelect == null) { 562 return null; 563 } 564 565 // First, check if this CTE has explicit columns matching the name 566 TTable result = findColumnInSelectList(cteSelect, columnName); 567 if (result != null) { 568 return result; 569 } 570 571 // If not found directly, check if this CTE uses SELECT * from another CTE 572 if (cteSelect.tables != null) { 573 for (int i = 0; i < cteSelect.tables.size(); i++) { 574 TTable table = cteSelect.tables.getTable(i); 575 if (table == null) continue; 576 577 // If it references another CTE, trace through it 578 if (table.isCTEName() && table.getCTE() != null) { 579 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 580 TSelectSqlStatement underlyingSelect = underlyingCte.getSubquery(); 581 if (underlyingSelect != null) { 582 result = findColumnInSelectList(underlyingSelect, columnName); 583 if (result != null) { 584 return result; 585 } 586 } 587 } 588 } 589 } 590 591 return null; 592 } 593 594 /** 595 * Trace a column through a CTE to find its correct source table. 596 * This handles CTEs with JOINs where columns come from different tables, 597 * including CTEs with star columns that reference other CTEs. 598 * 599 * @param cteNs The CTENamespace 600 * @param resultColumn The TResultColumn from the CTE's SELECT list 601 * @return The correct source table, or null if unable to determine 602 */ 603 private TTable traceColumnThroughCTE(CTENamespace cteNs, TResultColumn resultColumn) { 604 if (resultColumn == null || resultColumn.getExpr() == null) { 605 return null; 606 } 607 608 TExpression expr = resultColumn.getExpr(); 609 610 // Check if the expression is a star column (e.g., SELECT * FROM other_cte) 611 // In this case, we need to trace through to the underlying CTE 612 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 613 TObjectName colRef = expr.getObjectOperand(); 614 if (colRef != null && "*".equals(colRef.getColumnNameOnly())) { 615 // This is a star column - trace through to find the actual column 616 return traceColumnThroughStarInCTE(cteNs, exposedName); 617 } 618 } 619 620 // Check if the expression is a simple column reference 621 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 622 return null; 623 } 624 625 TObjectName colRef = expr.getObjectOperand(); 626 if (colRef == null) { 627 return null; 628 } 629 630 // Check if the column has a table qualifier (e.g., "b.band_name") 631 String tableQualifier = colRef.getTableString(); 632 if (tableQualifier == null || tableQualifier.isEmpty()) { 633 // No qualifier - can't determine which table 634 return null; 635 } 636 637 // Get the CTE's subquery to find the table with matching alias 638 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 639 if (cteSubquery == null || cteSubquery.tables == null) { 640 return null; 641 } 642 643 // Search for the table with matching alias or name 644 for (int i = 0; i < cteSubquery.tables.size(); i++) { 645 TTable table = cteSubquery.tables.getTable(i); 646 if (table == null) continue; 647 648 // Check alias match 649 String alias = table.getAliasName(); 650 if (alias != null && alias.equalsIgnoreCase(tableQualifier)) { 651 // Found the table - now trace to its final physical table if needed 652 return traceToPhysicalTable(table); 653 } 654 655 // Check table name match (for unaliased tables) 656 String tableName = table.getTableName() != null ? table.getTableName().toString() : null; 657 if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) { 658 return traceToPhysicalTable(table); 659 } 660 } 661 662 return null; 663 } 664 665 /** 666 * Trace a specific column through a CTE that uses SELECT *. 667 * This finds the underlying CTE that defines the column and traces it to the correct table. 668 * 669 * @param cteNs The CTE namespace with SELECT * 670 * @param columnName The name of the column to trace 671 * @return The correct source table, or null if unable to determine 672 */ 673 private TTable traceColumnThroughStarInCTE(CTENamespace cteNs, String columnName) { 674 if (columnName == null || columnName.isEmpty()) { 675 return null; 676 } 677 678 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 679 if (cteSubquery == null || cteSubquery.tables == null) { 680 return null; 681 } 682 683 // Find the underlying CTE or table that the star column references 684 for (int i = 0; i < cteSubquery.tables.size(); i++) { 685 TTable table = cteSubquery.tables.getTable(i); 686 if (table == null) continue; 687 688 // If it's a CTE reference, look for the column in that CTE 689 if (table.isCTEName() && table.getCTE() != null) { 690 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 691 TSelectSqlStatement underlyingSubquery = underlyingCte.getSubquery(); 692 if (underlyingSubquery != null) { 693 // Look for the column in the underlying CTE's SELECT list 694 TTable tracedTable = findColumnInSelectList(underlyingSubquery, columnName); 695 if (tracedTable != null) { 696 return tracedTable; 697 } 698 } 699 } 700 } 701 702 return null; 703 } 704 705 /** 706 * Find a column by name in a SELECT list and trace it to its source table. 707 * 708 * @param selectStmt The SELECT statement to search 709 * @param columnName The column name to find 710 * @return The source table for the column, or null if not found 711 */ 712 private TTable findColumnInSelectList(TSelectSqlStatement selectStmt, String columnName) { 713 if (selectStmt == null || selectStmt.getResultColumnList() == null) { 714 return null; 715 } 716 717 gudusoft.gsqlparser.nodes.TResultColumnList resultList = selectStmt.getResultColumnList(); 718 for (int i = 0; i < resultList.size(); i++) { 719 TResultColumn rc = resultList.getResultColumn(i); 720 if (rc == null) continue; 721 722 // Get the exposed name (alias or column name) 723 String exposedColName = rc.getAliasClause() != null 724 ? rc.getAliasClause().toString() 725 : (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null 726 ? rc.getExpr().getObjectOperand().getColumnNameOnly() 727 : null); 728 729 if (exposedColName != null && exposedColName.equalsIgnoreCase(columnName)) { 730 // Found the column - trace it to its source table 731 TExpression expr = rc.getExpr(); 732 if (expr != null && expr.getExpressionType() == EExpressionType.simple_object_name_t) { 733 TObjectName colRef = expr.getObjectOperand(); 734 if (colRef != null) { 735 String tableQualifier = colRef.getTableString(); 736 if (tableQualifier != null && !tableQualifier.isEmpty()) { 737 // Find the table with this qualifier in the FROM clause 738 if (selectStmt.tables != null) { 739 for (int j = 0; j < selectStmt.tables.size(); j++) { 740 TTable table = selectStmt.tables.getTable(j); 741 if (table == null) continue; 742 743 String alias = table.getAliasName(); 744 if (alias != null && alias.equalsIgnoreCase(tableQualifier)) { 745 return traceToPhysicalTable(table); 746 } 747 748 String tableName = table.getTableName() != null 749 ? table.getTableName().toString() : null; 750 if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) { 751 return traceToPhysicalTable(table); 752 } 753 } 754 } 755 } 756 } 757 } 758 } 759 } 760 761 return null; 762 } 763 764 /** 765 * Trace a table to its underlying physical table. 766 * Handles CTEs, subqueries, and JOINs. 767 */ 768 private TTable traceToPhysicalTable(TTable table) { 769 if (table == null) { 770 return null; 771 } 772 773 // If it's already a physical table, return it 774 if (table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !table.isCTEName()) { 775 return table; 776 } 777 778 // If it's a CTE reference, trace through the CTE 779 if (table.isCTEName() && table.getCTE() != null) { 780 // Use a simple approach - get the first physical table from the CTE 781 // This could be enhanced to trace specific columns through nested CTEs 782 gudusoft.gsqlparser.nodes.TCTE nestedCte = table.getCTE(); 783 if (nestedCte.getSubquery() != null && nestedCte.getSubquery().tables != null) { 784 for (int i = 0; i < nestedCte.getSubquery().tables.size(); i++) { 785 TTable nestedTable = nestedCte.getSubquery().tables.getTable(i); 786 TTable physical = traceToPhysicalTable(nestedTable); 787 if (physical != null) { 788 return physical; 789 } 790 } 791 } 792 } 793 794 // If it's a subquery, trace through it 795 if (table.getSubquery() != null) { 796 SubqueryNamespace nestedNs = new SubqueryNamespace( 797 table.getSubquery(), 798 table.getAliasName(), 799 null // nameMatcher not needed for simple tracing 800 ); 801 return nestedNs.getFinalTable(); 802 } 803 804 return null; 805 } 806 807 /** 808 * Trace a column alias through a subquery to find its source table. 809 * When a column is aliased (e.g., SELECT t.id AS col1 FROM my_table t), 810 * the alias changes the column name but the data still comes from the 811 * underlying table. This method traces through to find that table. 812 * 813 * <p>Used by {@link #getTracedFinalTable()} to provide alias-aware lineage 814 * tracing. Handles both qualified (t.id AS col1) and unqualified (id AS col1) 815 * column references, as well as SQL Server proprietary alias syntax.</p> 816 * 817 * @param subNs The SubqueryNamespace containing the aliased column 818 * @return The physical table the column traces to, or null if undetermined 819 */ 820 private TTable traceColumnAliasThroughSubquery(SubqueryNamespace subNs) { 821 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 822 return null; 823 } 824 825 TResultColumn rc = (TResultColumn) definitionNode; 826 TExpression expr = rc.getExpr(); 827 if (expr == null) { 828 return null; 829 } 830 831 TObjectName colRef = null; 832 833 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 834 // Standard: SELECT col AS alias 835 colRef = expr.getObjectOperand(); 836 } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) { 837 // SQL Server: SELECT alias = col 838 TExpression rightExpr = expr.getRightOperand(); 839 if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 840 colRef = rightExpr.getObjectOperand(); 841 } 842 } 843 844 if (colRef == null) { 845 return null; 846 } 847 848 // Use the resolved sourceTable from the inner column reference 849 TTable sourceTable = colRef.getSourceTable(); 850 if (sourceTable != null) { 851 return traceToPhysicalTable(sourceTable); 852 } 853 854 // Fallback: if no sourceTable resolved, try to find by table qualifier 855 String tableQualifier = colRef.getTableString(); 856 if (tableQualifier != null && !tableQualifier.isEmpty()) { 857 TSelectSqlStatement subquery = subNs.getSubquery(); 858 if (subquery != null && subquery.tables != null) { 859 for (int i = 0; i < subquery.tables.size(); i++) { 860 TTable table = subquery.tables.getTable(i); 861 if (table == null) continue; 862 863 String alias = table.getAliasName(); 864 if (alias != null && alias.equalsIgnoreCase(tableQualifier)) { 865 return traceToPhysicalTable(table); 866 } 867 868 String tableName = table.getTableName() != null ? table.getTableName().toString() : null; 869 if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) { 870 return traceToPhysicalTable(table); 871 } 872 } 873 } 874 } 875 876 return null; 877 } 878 879 /** 880 * Get all physical tables that this column might originate from. 881 * 882 * <p>For columns from UNION queries, this returns tables from ALL branches, 883 * not just the first one. This is essential for proper lineage tracking 884 * where a column like {@code actor_id} in a UNION query should be linked 885 * to {@code actor.actor_id}, {@code actor2.actor_id}, {@code actor3.actor_id}.</p> 886 * 887 * <p>For regular single-table sources, this returns a single-element list 888 * with the same table as {@link #getFinalTable()}.</p> 889 * 890 * @return List of all physical tables, or empty list if unable to determine 891 */ 892 public java.util.List<TTable> getAllFinalTables() { 893 // If this ColumnSource has explicit candidateTables set (e.g., from UNION inference), 894 // use those instead of delegating to namespace. This is critical for UNION queries 895 // where only branches with SELECT * should contribute candidate tables for inferred columns. 896 // An EMPTY list means "no matching tables" - return it as-is without delegating. 897 // A NULL means "not determined" - delegate to namespace. 898 if (candidateTables != null) { 899 return candidateTables; 900 } 901 902 if (sourceNamespace == null) { 903 if (overrideTable != null) { 904 return java.util.Collections.singletonList(overrideTable); 905 } 906 return java.util.Collections.emptyList(); 907 } 908 909 // For calculated columns and aliases in SubqueryNamespace, don't trace 910 if (sourceNamespace instanceof SubqueryNamespace) { 911 if (isCalculatedColumn() || isColumnAlias()) { 912 return java.util.Collections.emptyList(); 913 } 914 } 915 916 // For CTENamespace calculated/alias/explicit columns, trace to CTE itself 917 if (sourceNamespace instanceof CTENamespace) { 918 if (isCalculatedColumn() || isColumnAlias() || isCTEExplicitColumn()) { 919 TTable cteTable = ((CTENamespace) sourceNamespace).getReferencingTable(); 920 if (cteTable != null) { 921 return java.util.Collections.singletonList(cteTable); 922 } 923 return java.util.Collections.emptyList(); 924 } 925 } 926 927 // Delegate to namespace - handles UNION queries via UnionNamespace.getAllFinalTables() 928 return sourceNamespace.getAllFinalTables(); 929 } 930 931 /** 932 * Check if this column is a passthrough reference to an underlying alias. 933 * 934 * <p>A passthrough column is a simple column reference in a subquery that 935 * references another column from its FROM clause. If that underlying column 936 * is an alias, then this passthrough should not trace to the base table.</p> 937 * 938 * <p>Example: In {@code SELECT stat_typ FROM (SELECT stat_typ = col FROM t) AS b}, 939 * the outer {@code stat_typ} is a passthrough to {@code b.stat_typ}, which is an alias.</p> 940 * 941 * @return true if this is a passthrough to an alias 942 */ 943 private boolean isPassthroughToAlias() { 944 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 945 return false; 946 } 947 948 TResultColumn rc = (TResultColumn) definitionNode; 949 TExpression expr = rc.getExpr(); 950 if (expr == null) { 951 return false; 952 } 953 954 // Only check simple column references (passthroughs) 955 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 956 return false; 957 } 958 959 // If this column itself has an alias that differs, it's already handled by isColumnAlias() 960 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 961 return false; 962 } 963 964 // Get the column name being referenced 965 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 966 if (objName == null) { 967 return false; 968 } 969 String columnName = objName.getColumnNameOnly(); 970 if (columnName == null || columnName.isEmpty()) { 971 return false; 972 } 973 974 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 975 if (sourceNamespace instanceof SubqueryNamespace) { 976 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 977 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 978 if (underlyingSource != null) { 979 // Check if the underlying column is an alias or calculated 980 if (underlyingSource.isColumnAlias() || underlyingSource.isCalculatedColumn()) { 981 return true; 982 } 983 // Recursively check if it's a passthrough to alias 984 if (underlyingSource.isPassthroughToAlias()) { 985 return true; 986 } 987 } 988 } 989 990 return false; 991 } 992 993 /** 994 * Check if this subquery column is a passthrough reference to a calculated column. 995 * 996 * <p>A subquery column is a passthrough to calculated if:</p> 997 * <ol> 998 * <li>The column definition is a simple column reference (e.g., kko_lfz_9)</li> 999 * <li>The referenced column in the FROM scope is calculated (CASE, function, etc.)</li> 1000 * </ol> 1001 * 1002 * <p>Example:</p> 1003 * <pre> 1004 * SELECT kko_lfz_9 AS KKO_LFZ_9 1005 * FROM (SELECT CASE WHEN... END AS kko_lfz_9 FROM t) subq 1006 * </pre> 1007 * <p>Here, kko_lfz_9 in the outer query is a passthrough to a calculated column in subq.</p> 1008 * 1009 * <p>This differs from {@link #isPassthroughToAlias()} which skips columns with aliases. 1010 * Here we check even aliased passthroughs to see if they reference calculated columns.</p> 1011 * 1012 * @return true if this is a passthrough to a calculated column in a subquery 1013 */ 1014 private boolean isPassthroughToCalculatedInSubquery() { 1015 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1016 return false; 1017 } 1018 1019 TResultColumn rc = (TResultColumn) definitionNode; 1020 TExpression expr = rc.getExpr(); 1021 if (expr == null) { 1022 return false; 1023 } 1024 1025 // Only check simple column references (passthroughs) 1026 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1027 return false; 1028 } 1029 1030 // Get the column name being referenced 1031 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1032 if (objName == null) { 1033 return false; 1034 } 1035 String columnName = objName.getColumnNameOnly(); 1036 if (columnName == null || columnName.isEmpty()) { 1037 return false; 1038 } 1039 1040 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 1041 if (sourceNamespace instanceof SubqueryNamespace) { 1042 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1043 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 1044 if (underlyingSource != null) { 1045 // Check if the underlying column is calculated 1046 if (underlyingSource.isCalculatedColumn()) { 1047 return true; 1048 } 1049 // Recursively check if it's a passthrough to calculated 1050 if (underlyingSource.isPassthroughToCalculatedInSubquery()) { 1051 return true; 1052 } 1053 } 1054 } 1055 1056 return false; 1057 } 1058 1059 /** 1060 * Check if this CTE column is a passthrough reference to a calculated column in a subquery or nested CTE. 1061 * 1062 * <p>A CTE column is a passthrough to calculated if:</p> 1063 * <ol> 1064 * <li>The column definition is a simple qualified column reference (e.g., subq.calc_col or cte.calc_col)</li> 1065 * <li>The qualifier refers to a subquery or CTE in the CTE's body</li> 1066 * <li>The referenced column in that subquery/CTE is calculated (CASE, function, etc.)</li> 1067 * </ol> 1068 * 1069 * <p>Example with subquery:</p> 1070 * <pre> 1071 * WITH DataCTE AS ( 1072 * SELECT ErrorCountsCTE.ErrorSeverityCategory -- passthrough 1073 * FROM (SELECT CASE...END AS ErrorSeverityCategory FROM t) ErrorCountsCTE 1074 * ) 1075 * </pre> 1076 * 1077 * <p>Example with nested CTE:</p> 1078 * <pre> 1079 * WITH attendance_summary AS ( 1080 * SELECT date_trunc('month', attendance_date) as month FROM attendance 1081 * ) 1082 * WITH outer_cte AS ( 1083 * SELECT a.month FROM attendance_summary a -- passthrough to calculated in nested CTE 1084 * ) 1085 * </pre> 1086 * 1087 * @return true if this is a passthrough to a calculated column in a CTE 1088 */ 1089 private boolean isPassthroughToCalculatedInCTE() { 1090 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1091 return false; 1092 } 1093 1094 TResultColumn rc = (TResultColumn) definitionNode; 1095 TExpression expr = rc.getExpr(); 1096 if (expr == null) { 1097 return false; 1098 } 1099 1100 // Only check simple qualified column references (passthroughs like subq.column) 1101 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1102 return false; 1103 } 1104 1105 // Get the column reference 1106 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1107 if (objName == null) { 1108 return false; 1109 } 1110 1111 // Must have a table qualifier (e.g., "ErrorCountsCTE" in "ErrorCountsCTE.ErrorSeverityCategory") 1112 String tableQualifier = objName.getTableString(); 1113 if (tableQualifier == null || tableQualifier.isEmpty()) { 1114 return false; 1115 } 1116 1117 String columnName = objName.getColumnNameOnly(); 1118 if (columnName == null || columnName.isEmpty()) { 1119 return false; 1120 } 1121 1122 // Get the CTE's subquery to find the referenced subquery alias 1123 if (!(sourceNamespace instanceof CTENamespace)) { 1124 return false; 1125 } 1126 1127 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1128 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 1129 if (cte == null || cte.getSubquery() == null) { 1130 return false; 1131 } 1132 1133 // Find the subquery/table with this alias in the CTE's body 1134 gudusoft.gsqlparser.stmt.TSelectSqlStatement cteBody = cte.getSubquery(); 1135 TTable referencedTable = findTableByAlias(cteBody, tableQualifier); 1136 if (referencedTable == null) { 1137 return false; 1138 } 1139 1140 // Case 1: Referenced table is a subquery 1141 if (referencedTable.getSubquery() != null) { 1142 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = referencedTable.getSubquery(); 1143 return isCalculatedColumnInSelect(subquery, columnName); 1144 } 1145 1146 // Case 2: Referenced table is a CTE reference 1147 if (referencedTable.isCTEName() && referencedTable.getCTE() != null) { 1148 gudusoft.gsqlparser.nodes.TCTE referencedCTE = referencedTable.getCTE(); 1149 if (referencedCTE.getSubquery() != null) { 1150 return isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName); 1151 } 1152 } 1153 1154 return false; 1155 } 1156 1157 /** 1158 * Find a table in a SELECT statement by its alias. 1159 */ 1160 private TTable findTableByAlias(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String alias) { 1161 if (select == null || select.tables == null || alias == null) { 1162 return null; 1163 } 1164 1165 for (int i = 0; i < select.tables.size(); i++) { 1166 TTable table = select.tables.getTable(i); 1167 if (table != null) { 1168 String tableAlias = table.getAliasName(); 1169 if (tableAlias != null && tableAlias.equalsIgnoreCase(alias)) { 1170 return table; 1171 } 1172 // Also check table name for non-aliased references 1173 if (tableAlias == null && table.getTableName() != null) { 1174 String tableName = table.getTableName().toString(); 1175 if (tableName != null && tableName.equalsIgnoreCase(alias)) { 1176 return table; 1177 } 1178 } 1179 } 1180 } 1181 return null; 1182 } 1183 1184 /** 1185 * Check if a column in a SELECT statement is calculated (not a simple column reference). 1186 */ 1187 private boolean isCalculatedColumnInSelect(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1188 if (select == null || select.getResultColumnList() == null || columnName == null) { 1189 return false; 1190 } 1191 1192 for (int i = 0; i < select.getResultColumnList().size(); i++) { 1193 TResultColumn rc = select.getResultColumnList().getResultColumn(i); 1194 if (rc == null) continue; 1195 1196 // Get the column name for this result column 1197 String rcName = null; 1198 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1199 rcName = rc.getAliasClause().getAliasName().toString(); 1200 } else if (rc.getExpr() != null && 1201 rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t && 1202 rc.getExpr().getObjectOperand() != null) { 1203 rcName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 1204 } 1205 1206 if (rcName != null && rcName.equalsIgnoreCase(columnName)) { 1207 // Found the column - check if it's calculated 1208 TExpression expr = rc.getExpr(); 1209 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1210 // Non-simple expression = calculated 1211 return true; 1212 } 1213 } 1214 } 1215 return false; 1216 } 1217 1218 /** 1219 * Check if this column source represents a calculated expression. 1220 * 1221 * <p>A column is calculated if its definition is a TResultColumn with 1222 * a non-simple expression (not a direct column reference or star).</p> 1223 * 1224 * <p>For inferred columns (via star expansion), we trace back to the 1225 * source CTE/subquery to check if the original column is calculated.</p> 1226 * 1227 * @return true if this is a calculated column 1228 */ 1229 public boolean isCalculatedColumn() { 1230 if (definitionNode == null) { 1231 // For inferred columns through star expansion, check if the underlying 1232 // column in the source CTE/subquery is calculated 1233 return isInferredFromCalculatedColumn(); 1234 } 1235 1236 if (!(definitionNode instanceof TResultColumn)) { 1237 return false; 1238 } 1239 1240 TResultColumn rc = (TResultColumn) definitionNode; 1241 TExpression expr = rc.getExpr(); 1242 if (expr == null) { 1243 return false; 1244 } 1245 1246 EExpressionType exprType = expr.getExpressionType(); 1247 1248 // Simple column reference - NOT calculated (passthrough) 1249 if (exprType == EExpressionType.simple_object_name_t) { 1250 return false; 1251 } 1252 1253 // Star column - NOT calculated (passthrough) 1254 String colText = rc.toString(); 1255 if (colText != null && colText.endsWith("*")) { 1256 return false; 1257 } 1258 1259 // SQL Server proprietary column alias (col = expr) 1260 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1261 if (expr.getRightOperand() != null && 1262 expr.getRightOperand().getExpressionType() == EExpressionType.simple_object_name_t) { 1263 return false; 1264 } 1265 } 1266 1267 // Any other expression type is calculated 1268 return true; 1269 } 1270 1271 /** 1272 * Check if this is an inferred column (via star expansion) that originates from 1273 * a calculated column in the source CTE/subquery. 1274 * 1275 * <p>When a column is resolved through star expansion (e.g., SELECT * FROM CTE), 1276 * the definitionNode is null. We need to trace back to the source namespace 1277 * to check if the original column is calculated.</p> 1278 * 1279 * @return true if this inferred column traces back to a calculated column 1280 */ 1281 private boolean isInferredFromCalculatedColumn() { 1282 // Only check for inferred columns (evidence contains "auto_inferred") 1283 if (evidence == null || !evidence.contains("auto_inferred")) { 1284 return false; 1285 } 1286 1287 // Need the source namespace and column name to trace 1288 if (sourceNamespace == null || exposedName == null) { 1289 return false; 1290 } 1291 1292 // For CTE namespace, check if the column is calculated in the CTE's SELECT list 1293 if (sourceNamespace instanceof CTENamespace) { 1294 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1295 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 1296 if (cte != null && cte.getSubquery() != null) { 1297 // First check the CTE's direct SELECT list 1298 if (isCalculatedColumnInSelect(cte.getSubquery(), exposedName)) { 1299 return true; 1300 } 1301 1302 // If the CTE has a star column, trace through to referenced CTEs 1303 if (cteNs.hasStarColumn()) { 1304 return isCalculatedInCTEChain(cte.getSubquery(), exposedName); 1305 } 1306 } 1307 } 1308 1309 // For Subquery namespace, check if the column is calculated in the subquery's SELECT list 1310 if (sourceNamespace instanceof SubqueryNamespace) { 1311 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1312 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 1313 if (subquery != null) { 1314 // First check the subquery's direct SELECT list 1315 if (isCalculatedColumnInSelect(subquery, exposedName)) { 1316 return true; 1317 } 1318 1319 // If the subquery has a star column, trace through to source tables 1320 if (subNs.hasStarColumn()) { 1321 return isCalculatedInSubqueryChain(subquery, exposedName); 1322 } 1323 } 1324 } 1325 1326 return false; 1327 } 1328 1329 /** 1330 * Check if a column is calculated by tracing through CTE references. 1331 * This handles cases like Stage4 -> Stage3 -> Stage2 where the column 1332 * is calculated at some intermediate level. 1333 */ 1334 private boolean isCalculatedInCTEChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1335 if (select == null || select.tables == null) { 1336 return false; 1337 } 1338 1339 // Look for CTE references in the FROM clause 1340 for (int i = 0; i < select.tables.size(); i++) { 1341 TTable table = select.tables.getTable(i); 1342 if (table != null && table.isCTEName() && table.getCTE() != null) { 1343 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1344 if (referencedCTE.getSubquery() != null) { 1345 // Check if the column is calculated in this CTE 1346 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1347 return true; 1348 } 1349 // Recursively check the CTE chain 1350 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) { 1351 return true; 1352 } 1353 } 1354 } 1355 } 1356 return false; 1357 } 1358 1359 /** 1360 * Check if a column is calculated by tracing through subquery references. 1361 */ 1362 private boolean isCalculatedInSubqueryChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1363 if (select == null || select.tables == null) { 1364 return false; 1365 } 1366 1367 // Look for subquery tables in the FROM clause 1368 for (int i = 0; i < select.tables.size(); i++) { 1369 TTable table = select.tables.getTable(i); 1370 if (table != null && table.getSubquery() != null) { 1371 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery(); 1372 // Check if the column is calculated in this subquery 1373 if (isCalculatedColumnInSelect(subquery, columnName)) { 1374 return true; 1375 } 1376 // Recursively check the subquery chain 1377 if (isCalculatedInSubqueryChain(subquery, columnName)) { 1378 return true; 1379 } 1380 } 1381 // Also check CTE references within subqueries 1382 if (table != null && table.isCTEName() && table.getCTE() != null) { 1383 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1384 if (referencedCTE.getSubquery() != null) { 1385 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1386 return true; 1387 } 1388 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) { 1389 return true; 1390 } 1391 } 1392 } 1393 } 1394 return false; 1395 } 1396 1397 /** 1398 * Check if this column source represents a column alias (renamed column). 1399 * 1400 * <p>A column is an alias if it's a simple column reference in a subquery 1401 * that has been given a different name via AS or NAMED. For example:</p> 1402 * <ul> 1403 * <li>{@code SELECT col AS alias FROM table} - alias is different from col</li> 1404 * <li>{@code SELECT col (NAMED alias) FROM table} - Teradata NAMED syntax</li> 1405 * <li>{@code SELECT alias = col FROM table} - SQL Server proprietary syntax</li> 1406 * </ul> 1407 * 1408 * <p>Column aliases are traced through in {@link #getFinalTable()} to find the 1409 * physical table the data originates from, since the alias only renames the column 1410 * but the data still comes from the physical table.</p> 1411 * 1412 * @return true if this is a column alias with a different name than the original 1413 */ 1414 public boolean isColumnAlias() { 1415 if (definitionNode == null) { 1416 return false; 1417 } 1418 1419 if (!(definitionNode instanceof TResultColumn)) { 1420 return false; 1421 } 1422 1423 TResultColumn rc = (TResultColumn) definitionNode; 1424 TExpression expr = rc.getExpr(); 1425 if (expr == null) { 1426 return false; 1427 } 1428 1429 EExpressionType exprType = expr.getExpressionType(); 1430 1431 // Handle SQL Server proprietary alias syntax: alias = column 1432 // Example: stat_typ = stellplatz_typ 1433 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1434 TExpression rightExpr = expr.getRightOperand(); 1435 TExpression leftExpr = expr.getLeftOperand(); 1436 // Only if right side is a simple column reference 1437 if (rightExpr != null && leftExpr != null && 1438 rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 1439 gudusoft.gsqlparser.nodes.TObjectName rightObjName = rightExpr.getObjectOperand(); 1440 gudusoft.gsqlparser.nodes.TObjectName leftObjName = leftExpr.getObjectOperand(); 1441 if (rightObjName != null && leftObjName != null) { 1442 String origName = rightObjName.getColumnNameOnly(); 1443 String aliasName = leftObjName.getColumnNameOnly(); 1444 // If alias name differs from original column name, it's an alias 1445 if (origName != null && aliasName != null && 1446 !origName.equalsIgnoreCase(aliasName)) { 1447 return true; 1448 } 1449 } 1450 } 1451 return false; 1452 } 1453 1454 // Standard alias syntax: column AS alias 1455 // Only applies to simple column references 1456 if (exprType != EExpressionType.simple_object_name_t) { 1457 return false; 1458 } 1459 1460 // Check if there's an alias that differs from the column name 1461 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1462 String aliasName = rc.getAliasClause().getAliasName().toString(); 1463 if (aliasName != null && !aliasName.isEmpty()) { 1464 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1465 if (objName != null) { 1466 String origName = objName.getColumnNameOnly(); 1467 // If alias name differs from original name, it's an alias 1468 if (origName != null && !origName.equalsIgnoreCase(aliasName)) { 1469 return true; 1470 } 1471 } 1472 } 1473 } 1474 1475 return false; 1476 } 1477 1478 /** 1479 * Check if this column is a CTE explicit column with a different name than the underlying column. 1480 * 1481 * <p>A CTE explicit column is one defined in the CTE's column list that maps to a 1482 * different column name in the CTE's SELECT list. For example:</p> 1483 * <pre> 1484 * WITH cte(c1, c2) AS (SELECT id, name FROM users) 1485 * SELECT c1 FROM cte -- c1 maps to 'id', names differ 1486 * </pre> 1487 * 1488 * <p>CTE explicit columns should NOT trace to base tables because the explicit 1489 * column name (c1) doesn't exist as an actual column in the base table (users).</p> 1490 * 1491 * @return true if this is a CTE explicit column with a different name 1492 */ 1493 public boolean isCTEExplicitColumn() { 1494 // Must be from a CTENamespace 1495 if (!(sourceNamespace instanceof CTENamespace)) { 1496 return false; 1497 } 1498 1499 // Check evidence for explicit column marker 1500 if (!"cte_explicit_column".equals(evidence)) { 1501 return false; 1502 } 1503 1504 // Get the underlying column name from the definition node 1505 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1506 return false; 1507 } 1508 1509 TResultColumn rc = (TResultColumn) definitionNode; 1510 TExpression expr = rc.getExpr(); 1511 if (expr == null) { 1512 return false; 1513 } 1514 1515 // Get the column name from the SELECT list item 1516 String underlyingName = null; 1517 1518 // Check for alias first 1519 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1520 underlyingName = rc.getAliasClause().getAliasName().toString(); 1521 } 1522 // Then check for simple column reference 1523 else if (expr.getExpressionType() == EExpressionType.simple_object_name_t && 1524 expr.getObjectOperand() != null) { 1525 underlyingName = expr.getObjectOperand().getColumnNameOnly(); 1526 } 1527 1528 // If we can't determine the underlying name, assume it's different 1529 // (calculated expressions, etc. are definitely different from explicit column names) 1530 if (underlyingName == null) { 1531 return true; 1532 } 1533 1534 // If the exposed name differs from the underlying column name, it's an explicit column rename 1535 return !exposedName.equalsIgnoreCase(underlyingName); 1536 } 1537 1538 /** 1539 * Get the override table, if set. 1540 */ 1541 public TTable getOverrideTable() { 1542 return overrideTable; 1543 } 1544 1545 /** 1546 * Get the candidate tables for ambiguous columns. 1547 * 1548 * <p>When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 1549 * this returns all possible source tables. End users can iterate through this 1550 * list to understand all potential sources for the column.</p> 1551 * 1552 * @return List of candidate tables, or empty list if not ambiguous 1553 */ 1554 public List<TTable> getCandidateTables() { 1555 return candidateTables != null ? candidateTables : Collections.emptyList(); 1556 } 1557 1558 /** 1559 * Check if this column has multiple candidate tables (is ambiguous). 1560 * 1561 * @return true if there are multiple candidate tables 1562 */ 1563 public boolean isAmbiguous() { 1564 return candidateTables != null && candidateTables.size() > 1; 1565 } 1566 1567 /** 1568 * Get the field path for deep/record field access. 1569 * 1570 * <p>When a column reference includes field access beyond the base column, 1571 * this returns the field path. For example, in {@code customer.address.city}, 1572 * if base column is {@code customer}, this returns a FieldPath with 1573 * segments {@code ["address", "city"]}.</p> 1574 * 1575 * @return The field path, or null if no field access 1576 */ 1577 public FieldPath getFieldPath() { 1578 return fieldPath; 1579 } 1580 1581 /** 1582 * Check if this column source has a field path (deep/record field access). 1583 * 1584 * @return true if a non-empty field path exists 1585 */ 1586 public boolean hasFieldPath() { 1587 return fieldPath != null && !fieldPath.isEmpty(); 1588 } 1589 1590 /** 1591 * Check if this is a struct field access (has evidence "struct_field_access"). 1592 * 1593 * <p>This is a convenience method for checking if this column source represents 1594 * a struct/record field dereference operation.</p> 1595 * 1596 * @return true if this is a struct field access 1597 */ 1598 public boolean isStructFieldAccess() { 1599 return "struct_field_access".equals(evidence); 1600 } 1601 1602 /** 1603 * Checks if this is a definite resolution (confidence = 1.0) 1604 */ 1605 public boolean isDefinite() { 1606 return confidence >= 1.0; 1607 } 1608 1609 /** 1610 * Checks if this is an inferred resolution (confidence < 1.0) 1611 */ 1612 public boolean isInferred() { 1613 return confidence < 1.0; 1614 } 1615 1616 @Override 1617 public String toString() { 1618 StringBuilder sb = new StringBuilder(); 1619 sb.append(exposedName); 1620 if (sourceNamespace != null) { 1621 sb.append(" from ").append(sourceNamespace.getDisplayName()); 1622 } 1623 if (confidence < 1.0) { 1624 sb.append(String.format(" (confidence: %.2f)", confidence)); 1625 } 1626 return sb.toString(); 1627 } 1628 1629 /** 1630 * Creates a copy with updated confidence and evidence. 1631 * Used when merging or updating inference results. 1632 * 1633 * @deprecated Use {@link #withEvidence(ResolutionEvidence)} instead 1634 */ 1635 public ColumnSource withConfidence(double newConfidence, String newEvidence) { 1636 return new ColumnSource( 1637 this.sourceNamespace, 1638 this.exposedName, 1639 this.definitionNode, 1640 newConfidence, 1641 newEvidence, 1642 this.overrideTable, 1643 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1644 null, // will create from legacy evidence 1645 this.fieldPath 1646 ); 1647 } 1648 1649 /** 1650 * Creates a copy with updated ResolutionEvidence. 1651 * This is the preferred method for updating evidence in new code. 1652 * 1653 * @param newEvidence The new evidence detail 1654 * @return A new ColumnSource with updated evidence 1655 */ 1656 public ColumnSource withEvidence(ResolutionEvidence newEvidence) { 1657 return new ColumnSource( 1658 this.sourceNamespace, 1659 this.exposedName, 1660 this.definitionNode, 1661 newEvidence != null ? newEvidence.getWeight() : this.confidence, 1662 newEvidence != null ? newEvidence.toLegacyEvidence() : this.evidence, 1663 this.overrideTable, 1664 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1665 newEvidence, 1666 this.fieldPath 1667 ); 1668 } 1669 1670 /** 1671 * Creates a copy with candidate tables. 1672 * Used when a column could come from multiple tables. 1673 */ 1674 public ColumnSource withCandidateTables(List<TTable> candidates) { 1675 return new ColumnSource( 1676 this.sourceNamespace, 1677 this.exposedName, 1678 this.definitionNode, 1679 this.confidence, 1680 this.evidence, 1681 this.overrideTable, 1682 candidates != null ? new java.util.ArrayList<>(candidates) : null, 1683 this.evidenceDetail, 1684 this.fieldPath 1685 ); 1686 } 1687 1688 /** 1689 * Creates a copy with a field path for deep/record field access. 1690 * 1691 * <p>This method is used when resolving struct/record field access patterns 1692 * like {@code customer.address.city}. The base column is preserved as the 1693 * exposedName, and the field path captures the remaining segments.</p> 1694 * 1695 * @param newFieldPath The field path segments (beyond the base column) 1696 * @return A new ColumnSource with the field path set 1697 */ 1698 public ColumnSource withFieldPath(FieldPath newFieldPath) { 1699 return new ColumnSource( 1700 this.sourceNamespace, 1701 this.exposedName, 1702 this.definitionNode, 1703 this.confidence, 1704 this.evidence, 1705 this.overrideTable, 1706 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1707 this.evidenceDetail, 1708 newFieldPath 1709 ); 1710 } 1711 1712 /** 1713 * Creates a copy with a field path from a list of segments. 1714 * 1715 * <p>Convenience method for creating a ColumnSource with a field path 1716 * from a list of string segments.</p> 1717 * 1718 * @param segments The field path segments 1719 * @return A new ColumnSource with the field path set 1720 */ 1721 public ColumnSource withFieldPath(List<String> segments) { 1722 return withFieldPath(FieldPath.of(segments)); 1723 } 1724 1725 /** 1726 * Creates a copy with field path and updated evidence. 1727 * 1728 * <p>This method is used when resolving struct field access, combining 1729 * both the field path and the struct_field_access evidence marker.</p> 1730 * 1731 * @param newFieldPath The field path segments 1732 * @param newEvidence The evidence string (e.g., "struct_field_access") 1733 * @return A new ColumnSource with field path and evidence updated 1734 */ 1735 public ColumnSource withFieldPath(FieldPath newFieldPath, String newEvidence) { 1736 return new ColumnSource( 1737 this.sourceNamespace, 1738 this.exposedName, 1739 this.definitionNode, 1740 this.confidence, 1741 newEvidence, 1742 this.overrideTable, 1743 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1744 null, // will create from legacy evidence 1745 newFieldPath 1746 ); 1747 } 1748 1749 /** 1750 * Count the number of "real" tables in a table list, excluding implicit lateral derived tables. 1751 * 1752 * <p>Teradata supports implicit lateral derived tables, which are auto-added when a column 1753 * references an undeclared table in the WHERE clause. These should not be counted when 1754 * determining if a subquery has multiple tables for column resolution purposes.</p> 1755 * 1756 * @param tables The table list to count 1757 * @return The number of real (non-implicit) tables 1758 */ 1759 private static int countRealTables(gudusoft.gsqlparser.nodes.TTableList tables) { 1760 if (tables == null) { 1761 return 0; 1762 } 1763 int count = 0; 1764 for (int i = 0; i < tables.size(); i++) { 1765 TTable table = tables.getTable(i); 1766 if (table != null && table.getEffectType() != gudusoft.gsqlparser.ETableEffectType.tetImplicitLateralDerivedTable) { 1767 count++; 1768 } 1769 } 1770 return count; 1771 } 1772 1773 /** 1774 * Check if a SELECT statement has a qualified star column (e.g., ta.*, tb.*). 1775 * Qualified stars identify which table columns come from in multi-table subqueries. 1776 */ 1777 private static boolean hasQualifiedStar(gudusoft.gsqlparser.stmt.TSelectSqlStatement select) { 1778 if (select == null || select.getResultColumnList() == null) { 1779 return false; 1780 } 1781 gudusoft.gsqlparser.nodes.TResultColumnList resultCols = select.getResultColumnList(); 1782 for (int i = 0; i < resultCols.size(); i++) { 1783 TResultColumn rc = resultCols.getResultColumn(i); 1784 if (rc != null) { 1785 String colStr = rc.toString().trim(); 1786 // Qualified star has format "alias.*" or "table.*" 1787 if (colStr.endsWith("*") && colStr.contains(".")) { 1788 return true; 1789 } 1790 } 1791 } 1792 return false; 1793 } 1794 1795 /** 1796 * Check if a column exists in a table's DDL definition. 1797 * 1798 * <p>This method checks the table's column definitions (from CREATE TABLE statements 1799 * parsed in the same script) to verify if the column name is defined.</p> 1800 * 1801 * @param table The table to check 1802 * @param columnName The column name to look for 1803 * @return true if the column exists in the table's DDL, false if not found or no DDL available 1804 */ 1805 public static boolean isColumnInTableDdl(TTable table, String columnName) { 1806 if (table == null || columnName == null || columnName.isEmpty()) { 1807 return false; 1808 } 1809 1810 // Check if the table has column definitions (from CREATE TABLE DDL) 1811 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1812 if (columnDefs != null && columnDefs.size() > 0) { 1813 for (int i = 0; i < columnDefs.size(); i++) { 1814 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 1815 if (colDef != null && colDef.getColumnName() != null) { 1816 String defColName = colDef.getColumnName().toString(); 1817 if (defColName != null && defColName.equalsIgnoreCase(columnName)) { 1818 return true; 1819 } 1820 } 1821 } 1822 // DDL exists but column not found 1823 return false; 1824 } 1825 1826 // No DDL available - return false (cannot verify) 1827 return false; 1828 } 1829 1830 /** 1831 * Check if a table has DDL metadata available (from CREATE TABLE in same script). 1832 * 1833 * @param table The table to check 1834 * @return true if DDL metadata is available for this table 1835 */ 1836 public static boolean hasTableDdl(TTable table) { 1837 if (table == null) { 1838 return false; 1839 } 1840 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1841 return columnDefs != null && columnDefs.size() > 0; 1842 } 1843 1844 /** 1845 * Check DDL verification status for a candidate table. 1846 * 1847 * <p>Returns a tri-state result:</p> 1848 * <ul> 1849 * <li>1 = Column exists in table's DDL</li> 1850 * <li>0 = Column NOT found in table's DDL (DDL available but column missing)</li> 1851 * <li>-1 = Cannot verify (no DDL available for this table)</li> 1852 * </ul> 1853 * 1854 * @param table The candidate table to check 1855 * @param columnName The column name to verify 1856 * @return DDL verification status: 1 (exists), 0 (not found), -1 (no DDL) 1857 */ 1858 public static int getDdlVerificationStatus(TTable table, String columnName) { 1859 if (table == null || columnName == null) { 1860 return -1; 1861 } 1862 1863 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1864 if (columnDefs == null || columnDefs.size() == 0) { 1865 return -1; // No DDL available 1866 } 1867 1868 // DDL available - check if column exists 1869 for (int i = 0; i < columnDefs.size(); i++) { 1870 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 1871 if (colDef != null && colDef.getColumnName() != null) { 1872 String defColName = colDef.getColumnName().toString(); 1873 if (defColName != null && defColName.equalsIgnoreCase(columnName)) { 1874 return 1; // Column exists in DDL 1875 } 1876 } 1877 } 1878 1879 return 0; // DDL exists but column not found 1880 } 1881 1882 /** 1883 * Get DDL verification status for all candidate tables. 1884 * 1885 * <p>Returns a map from each candidate table to its DDL verification status:</p> 1886 * <ul> 1887 * <li>1 = Column exists in table's DDL</li> 1888 * <li>0 = Column NOT found in table's DDL</li> 1889 * <li>-1 = Cannot verify (no DDL available)</li> 1890 * </ul> 1891 * 1892 * @return Map of candidate tables to their DDL verification status, or empty map if no candidates 1893 */ 1894 public java.util.Map<TTable, Integer> getCandidateTableDdlStatus() { 1895 java.util.Map<TTable, Integer> result = new java.util.LinkedHashMap<>(); 1896 if (candidateTables == null || candidateTables.isEmpty() || exposedName == null) { 1897 return result; 1898 } 1899 1900 for (TTable candidate : candidateTables) { 1901 int status = getDdlVerificationStatus(candidate, exposedName); 1902 result.put(candidate, status); 1903 } 1904 return result; 1905 } 1906}