001package gudusoft.gsqlparser.resolver2.model; 002 003import gudusoft.gsqlparser.EExpressionType; 004import gudusoft.gsqlparser.nodes.TExpression; 005import gudusoft.gsqlparser.nodes.TObjectName; 006import gudusoft.gsqlparser.nodes.TParseTreeNode; 007import gudusoft.gsqlparser.nodes.TResultColumn; 008import gudusoft.gsqlparser.nodes.TTable; 009import gudusoft.gsqlparser.resolver2.inference.EvidenceType; 010import gudusoft.gsqlparser.resolver2.namespace.INamespace; 011import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 012import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 013import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace; 014import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 015 016import java.util.Collections; 017import java.util.List; 018 019/** 020 * Represents the source of a column reference. 021 * Tracks where a column comes from, including intermediate transformations 022 * through subqueries and CTEs. 023 * 024 * Design principles: 025 * 1. Immutable - once created, cannot be modified 026 * 2. Recursive - can trace back through subquery/CTE layers 027 * 3. Confidence-scored - supports evidence-based inference 028 */ 029public class ColumnSource { 030 /** The namespace where this column is exposed (e.g., subquery, table) */ 031 private final INamespace sourceNamespace; 032 033 /** The name by which this column is exposed in the namespace */ 034 private final String exposedName; 035 036 /** The AST node where this column is defined (TResultColumn, TTableColumn, etc.) */ 037 private final TParseTreeNode definitionNode; 038 039 /** Location information for the definition */ 040 private final SourceLocation definitionLocation; 041 042 /** 043 * Confidence score [0.0, 1.0]: 044 * - 1.0: Definite (from metadata or explicit definition) 045 * - 0.7-0.9: High confidence inference (strong evidence) 046 * - 0.5-0.7: Medium confidence inference (some evidence) 047 * - 0.0-0.5: Low confidence guess 048 */ 049 private final double confidence; 050 051 /** 052 * Evidence that supports this resolution. 053 * Used for debugging and explaining inference decisions. 054 * 055 * @deprecated Use {@link #evidenceDetail} instead. This field is kept for backward 056 * compatibility and will be derived from evidenceDetail if not explicitly set. 057 */ 058 private final String evidence; 059 060 /** 061 * Structured evidence detail for this resolution. 062 * Provides type-safe evidence with confidence weight and source traceability. 063 * This is the preferred way to access resolution evidence. 064 * 065 * @see ResolutionEvidence 066 */ 067 private final ResolutionEvidence evidenceDetail; 068 069 /** 070 * Override table for traced columns. 071 * When set, getFinalTable() returns this instead of namespace's table. 072 */ 073 private final TTable overrideTable; 074 075 /** 076 * Candidate tables for ambiguous columns. 077 * When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 078 * this list contains all possible source tables so end users can access them. 079 */ 080 private final List<TTable> candidateTables; 081 082 /** 083 * Field path for deep/record field access (e.g., struct.field.subfield). 084 * 085 * <p>When a column reference includes field access beyond the base column, 086 * this captures the field path. For example, in {@code customer.address.city}, 087 * if base column is {@code customer}, fieldPath contains {@code ["address", "city"]}.</p> 088 * 089 * <p>This field is null or empty for regular column references without field access.</p> 090 * 091 * @see FieldPath 092 */ 093 private final FieldPath fieldPath; 094 095 public ColumnSource(INamespace sourceNamespace, 096 String exposedName, 097 TParseTreeNode definitionNode, 098 double confidence, 099 String evidence) { 100 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, null, null); 101 } 102 103 public ColumnSource(INamespace sourceNamespace, 104 String exposedName, 105 TParseTreeNode definitionNode, 106 double confidence, 107 String evidence, 108 TTable overrideTable) { 109 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, null); 110 } 111 112 public ColumnSource(INamespace sourceNamespace, 113 String exposedName, 114 TParseTreeNode definitionNode, 115 double confidence, 116 String evidence, 117 TTable overrideTable, 118 List<TTable> candidateTables) { 119 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, null, null); 120 } 121 122 /** 123 * Full constructor with all fields including ResolutionEvidence. 124 */ 125 public ColumnSource(INamespace sourceNamespace, 126 String exposedName, 127 TParseTreeNode definitionNode, 128 double confidence, 129 String evidence, 130 TTable overrideTable, 131 List<TTable> candidateTables, 132 ResolutionEvidence evidenceDetail) { 133 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, evidenceDetail, null); 134 } 135 136 /** 137 * Full constructor with all fields including ResolutionEvidence and FieldPath. 138 * 139 * @param sourceNamespace The namespace where this column is exposed 140 * @param exposedName The name by which this column is exposed 141 * @param definitionNode The AST node where this column is defined 142 * @param confidence Confidence score [0.0, 1.0] 143 * @param evidence Evidence string for this resolution 144 * @param overrideTable Override table for traced columns 145 * @param candidateTables Candidate tables for ambiguous columns 146 * @param evidenceDetail Structured evidence detail 147 * @param fieldPath Field path for deep/record field access 148 */ 149 public ColumnSource(INamespace sourceNamespace, 150 String exposedName, 151 TParseTreeNode definitionNode, 152 double confidence, 153 String evidence, 154 TTable overrideTable, 155 List<TTable> candidateTables, 156 ResolutionEvidence evidenceDetail, 157 FieldPath fieldPath) { 158 this.sourceNamespace = sourceNamespace; 159 this.exposedName = exposedName; 160 this.definitionNode = definitionNode; 161 this.definitionLocation = definitionNode != null 162 ? new SourceLocation(definitionNode) 163 : null; 164 this.confidence = Math.max(0.0, Math.min(1.0, confidence)); 165 this.evidence = evidence; 166 this.overrideTable = overrideTable; 167 this.candidateTables = candidateTables != null ? Collections.unmodifiableList(candidateTables) : null; 168 this.fieldPath = fieldPath; 169 // If evidenceDetail not provided, create from legacy evidence 170 if (evidenceDetail != null) { 171 this.evidenceDetail = evidenceDetail; 172 } else if (evidence != null) { 173 this.evidenceDetail = ResolutionEvidence.fromLegacyEvidence(evidence, confidence, definitionNode); 174 } else { 175 this.evidenceDetail = null; 176 } 177 } 178 179 /** 180 * Constructor with ResolutionEvidence (preferred for new code). 181 */ 182 public ColumnSource(INamespace sourceNamespace, 183 String exposedName, 184 TParseTreeNode definitionNode, 185 ResolutionEvidence evidenceDetail) { 186 this(sourceNamespace, exposedName, definitionNode, 187 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 188 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 189 null, null, evidenceDetail); 190 } 191 192 /** 193 * Constructor with ResolutionEvidence and override table. 194 */ 195 public ColumnSource(INamespace sourceNamespace, 196 String exposedName, 197 TParseTreeNode definitionNode, 198 ResolutionEvidence evidenceDetail, 199 TTable overrideTable) { 200 this(sourceNamespace, exposedName, definitionNode, 201 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 202 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 203 overrideTable, null, evidenceDetail); 204 } 205 206 /** 207 * Constructor for definite matches (confidence = 1.0) 208 */ 209 public ColumnSource(INamespace sourceNamespace, 210 String exposedName, 211 TParseTreeNode definitionNode) { 212 this(sourceNamespace, exposedName, definitionNode, 1.0, "metadata"); 213 } 214 215 public INamespace getSourceNamespace() { 216 return sourceNamespace; 217 } 218 219 public String getExposedName() { 220 return exposedName; 221 } 222 223 public TParseTreeNode getDefinitionNode() { 224 return definitionNode; 225 } 226 227 public SourceLocation getDefinitionLocation() { 228 return definitionLocation; 229 } 230 231 public double getConfidence() { 232 return confidence; 233 } 234 235 public String getEvidence() { 236 return evidence; 237 } 238 239 /** 240 * Get the structured evidence detail for this resolution. 241 * 242 * <p>This is the preferred way to access resolution evidence as it provides: 243 * <ul> 244 * <li>Type-safe evidence type (enum)</li> 245 * <li>Confidence weight with clear semantics</li> 246 * <li>Source location for traceability</li> 247 * <li>Human-readable messages</li> 248 * </ul> 249 * 250 * @return The structured evidence detail, or null if not available 251 */ 252 public ResolutionEvidence getEvidenceDetail() { 253 return evidenceDetail; 254 } 255 256 /** 257 * Get the evidence type from the structured evidence detail. 258 * Convenience method for common use cases. 259 * 260 * @return The evidence type, or null if no evidence detail 261 */ 262 public EvidenceType getEvidenceType() { 263 return evidenceDetail != null ? evidenceDetail.getType() : null; 264 } 265 266 /** 267 * Check if this resolution has definite evidence (not inferred). 268 * Definite evidence comes from DDL, metadata, or explicit definitions. 269 * 270 * @return true if evidence is definite 271 */ 272 public boolean hasDefiniteEvidence() { 273 if (evidenceDetail != null) { 274 return evidenceDetail.isDefinite(); 275 } 276 // Fallback: check legacy evidence and confidence 277 if (confidence >= 1.0) { 278 return true; 279 } 280 if (evidence != null) { 281 String lower = evidence.toLowerCase(); 282 return lower.contains("metadata") || lower.contains("ddl") || 283 lower.contains("explicit") || lower.contains("insert_column"); 284 } 285 return false; 286 } 287 288 /** 289 * Get the <b>final</b> physical table this column originates from after tracing 290 * through all subqueries and CTEs. 291 * 292 * <h3>Semantic Difference: getFinalTable() vs TObjectName.getSourceTable()</h3> 293 * <ul> 294 * <li><b>getFinalTable()</b> (this method): The final physical table after 295 * recursively tracing through all subqueries and CTEs. Use this for data lineage.</li> 296 * <li><b>TObjectName.getSourceTable()</b>: The immediate source in the current scope. 297 * For a column from a subquery, this points to the subquery's TTable itself.</li> 298 * </ul> 299 * 300 * <h3>Example</h3> 301 * <pre>{@code 302 * SELECT title FROM (SELECT * FROM books) sub 303 * 304 * For the 'title' column in outer SELECT: 305 * - TObjectName.getSourceTable() → TTable for subquery 'sub' (immediate source) 306 * - ColumnSource.getFinalTable() → TTable for 'books' (final physical table) 307 * }</pre> 308 * 309 * <p>For calculated columns in subqueries (expressions like {@code START_DT - x AS alias}), 310 * this returns null because such calculated columns don't originate from a physical 311 * table - they are derived values computed in the subquery.</p> 312 * 313 * <p>Note: For CTEs, calculated columns ARE the CTE's own columns, so they trace 314 * to the CTE itself (handled by CTENamespace.getFinalTable()).</p> 315 * 316 * @return The physical table, or null if unable to determine or if calculated in subquery 317 * @see gudusoft.gsqlparser.nodes.TObjectName#getSourceTable() 318 */ 319 public TTable getFinalTable() { 320 if (sourceNamespace == null && overrideTable == null) { 321 return null; 322 } 323 324 // For SubqueryNamespace: calculated columns should NOT trace to base table 325 // They are derived values that don't exist in the underlying physical table 326 // Example: SELECT *, expr AS alias FROM table - alias is calculated, not from table 327 // 328 // IMPORTANT: Check BEFORE overrideTable to prevent alias/calculated columns 329 // from being traced to base tables even when overrideTable is explicitly set 330 if (sourceNamespace instanceof SubqueryNamespace && isCalculatedColumn()) { 331 return null; 332 } 333 334 // For CTENamespace: calculated columns ARE the CTE's own columns 335 // They should trace to the CTE itself (referencing table), NOT to underlying base tables 336 // Example: WITH cte AS (SELECT SUM(x) AS total FROM t) SELECT total FROM cte 337 // The 'total' column traces to 'cte', not to 't' 338 if (sourceNamespace instanceof CTENamespace && isCalculatedColumn()) { 339 return ((CTENamespace) sourceNamespace).getReferencingTable(); 340 } 341 342 // For SubqueryNamespace: column aliases should NOT trace to base table 343 // The alias name doesn't exist in the base table's schema 344 // Example: SELECT col AS alias FROM table - alias should not trace to table.alias 345 if (sourceNamespace instanceof SubqueryNamespace && isColumnAlias()) { 346 return null; 347 } 348 349 // For CTENamespace: column aliases ARE the CTE's own columns 350 // They should trace to the CTE itself, NOT to underlying base tables 351 // Example: WITH cte AS (SELECT x AS y FROM t) SELECT y FROM cte 352 // The 'y' column traces to 'cte', not to 't' 353 if (sourceNamespace instanceof CTENamespace && isColumnAlias()) { 354 return ((CTENamespace) sourceNamespace).getReferencingTable(); 355 } 356 357 // For CTENamespace: explicit column names ARE the CTE's own columns 358 // They should trace to the CTE itself, NOT to underlying base tables 359 // Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) SELECT c1 FROM cte 360 // The 'c1' column traces to 'cte', not to 't' (because 'c1' doesn't exist in 't') 361 if (sourceNamespace instanceof CTENamespace && isCTEExplicitColumn()) { 362 return ((CTENamespace) sourceNamespace).getReferencingTable(); 363 } 364 365 // For SubqueryNamespace: passthrough columns that reference aliases should NOT trace 366 // Example: SELECT stat_typ FROM (SELECT stat_typ = stellplatz_typ FROM t) AS b 367 // The stat_typ in outer query references b.stat_typ which is an alias 368 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToAlias()) { 369 return null; 370 } 371 372 // For SubqueryNamespace: passthrough columns that reference calculated columns should NOT trace 373 // Example: SELECT kko_lfz_9 FROM (SELECT CASE...END AS kko_lfz_9 FROM t) subq 374 // The outer kko_lfz_9 references a calculated column in the subquery 375 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToCalculatedInSubquery()) { 376 return null; 377 } 378 379 // For CTENamespace: passthrough columns that reference calculated subquery columns should NOT trace 380 // Example: WITH DataCTE AS (SELECT subq.calc_col FROM (SELECT CASE...END AS calc_col FROM t) subq) 381 // The CTE column calc_col references a calculated column in the subquery 382 if (sourceNamespace instanceof CTENamespace && isPassthroughToCalculatedInCTE()) { 383 return null; 384 } 385 386 // For CTE explicit column + star pattern: c1/c2/c3 trace to the star, NOT through the star 387 // Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees) 388 // Without metadata, c1 traces to Employees.* (the star), not Employees.c1 (which doesn't exist) 389 // The evidence "cte_explicit_column_via_star" indicates this pattern 390 if ("cte_explicit_column_via_star".equals(evidence)) { 391 return null; 392 } 393 394 // If an override table is set (e.g., for traced columns), use it 395 if (overrideTable != null) { 396 return overrideTable; 397 } 398 399 if (sourceNamespace == null) { 400 return null; 401 } 402 403 // For UnionNamespace: UNION columns don't belong to any specific physical table. 404 // They're a combination of multiple branches. UnionNamespace.getFinalTable() returns 405 // the first branch's table which is incorrect for tracking column origins. 406 if (sourceNamespace instanceof UnionNamespace) { 407 return null; 408 } 409 410 // For SubqueryNamespace without override table: if the subquery has multiple tables 411 // AND no qualified star to identify the source, we can't determine which table 412 // the column comes from. Returning the first table would be incorrect. 413 // Example: FROM CDS_H_PARTNER PAR, (SELECT kategorie ... FROM CDS_H_KUNDEN_OBJEKT) subq 414 // But if there's a qualified star like "ta.*", that identifies the source table. 415 // 416 // IMPORTANT: For Teradata, implicit lateral derived tables (auto-added tables when 417 // a column references an undeclared table in WHERE clause) should be excluded from 418 // the multiple-table count. These are syntactic sugar and shouldn't affect column 419 // resolution to the actual source table. 420 if (sourceNamespace instanceof SubqueryNamespace && overrideTable == null) { 421 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 422 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 423 if (subquery != null && subquery.tables != null) { 424 // Count only real tables (excluding implicit lateral derived tables) 425 int realTableCount = countRealTables(subquery.tables); 426 if (realTableCount > 1) { 427 // Check if there's a qualified star that can identify the source 428 if (!hasQualifiedStar(subquery)) { 429 return null; 430 } 431 } 432 } 433 } 434 435 // For CTENamespace with multiple tables (e.g., JOIN): trace the specific column 436 // to its correct source table using the definitionNode 437 // Example: WITH cte AS (SELECT m.album_id, b.band_name FROM albums m JOIN bands b ...) 438 // When tracing 'band_name', we need to find it comes from 'b' (bands), not 'm' (albums) 439 if (sourceNamespace instanceof CTENamespace) { 440 CTENamespace cteNs = (CTENamespace) sourceNamespace; 441 TTable tracedTable = null; 442 443 if (definitionNode instanceof TResultColumn) { 444 // Direct case: definitionNode is a TResultColumn from the CTE's SELECT list 445 tracedTable = traceColumnThroughCTE(cteNs, (TResultColumn) definitionNode); 446 } else { 447 // Indirect case: The column might be traced through a star column 448 // Try to find the column by name in the CTE chain 449 tracedTable = traceColumnByNameThroughCTE(cteNs, exposedName); 450 } 451 452 if (tracedTable != null) { 453 return tracedTable; 454 } 455 456 // For CTEs with multiple tables, if tracing failed (unqualified column), 457 // return null instead of the first table to avoid incorrect lineage. 458 // Example: WITH cte AS (SELECT musicians.id, musician_name, music_bands.band_name 459 // FROM musicians JOIN ... JOIN music_bands) 460 // The unqualified 'musician_name' cannot be traced to any specific table 461 // without metadata, so we should NOT guess and pick the first table. 462 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 463 if (cteSelect != null && cteSelect.tables != null) { 464 int tableCount = countRealTables(cteSelect.tables); 465 if (tableCount > 1) { 466 // Cannot determine which table - don't guess 467 return null; 468 } 469 } 470 } 471 472 return sourceNamespace.getFinalTable(); 473 } 474 475 /** 476 * Trace a column by name through a CTE to find its correct source table. 477 * This handles the case when the column is traced through star columns 478 * and we don't have the direct TResultColumn definition. 479 * 480 * @param cteNs The CTENamespace to trace through 481 * @param columnName The name of the column to find 482 * @return The correct source table, or null if unable to determine 483 */ 484 private TTable traceColumnByNameThroughCTE(CTENamespace cteNs, String columnName) { 485 if (columnName == null || columnName.isEmpty()) { 486 return null; 487 } 488 489 // Get the CTE's SELECT statement 490 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 491 if (cteSelect == null) { 492 return null; 493 } 494 495 // First, check if this CTE has explicit columns matching the name 496 TTable result = findColumnInSelectList(cteSelect, columnName); 497 if (result != null) { 498 return result; 499 } 500 501 // If not found directly, check if this CTE uses SELECT * from another CTE 502 if (cteSelect.tables != null) { 503 for (int i = 0; i < cteSelect.tables.size(); i++) { 504 TTable table = cteSelect.tables.getTable(i); 505 if (table == null) continue; 506 507 // If it references another CTE, trace through it 508 if (table.isCTEName() && table.getCTE() != null) { 509 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 510 TSelectSqlStatement underlyingSelect = underlyingCte.getSubquery(); 511 if (underlyingSelect != null) { 512 result = findColumnInSelectList(underlyingSelect, columnName); 513 if (result != null) { 514 return result; 515 } 516 } 517 } 518 } 519 } 520 521 return null; 522 } 523 524 /** 525 * Trace a column through a CTE to find its correct source table. 526 * This handles CTEs with JOINs where columns come from different tables, 527 * including CTEs with star columns that reference other CTEs. 528 * 529 * @param cteNs The CTENamespace 530 * @param resultColumn The TResultColumn from the CTE's SELECT list 531 * @return The correct source table, or null if unable to determine 532 */ 533 private TTable traceColumnThroughCTE(CTENamespace cteNs, TResultColumn resultColumn) { 534 if (resultColumn == null || resultColumn.getExpr() == null) { 535 return null; 536 } 537 538 TExpression expr = resultColumn.getExpr(); 539 540 // Check if the expression is a star column (e.g., SELECT * FROM other_cte) 541 // In this case, we need to trace through to the underlying CTE 542 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 543 TObjectName colRef = expr.getObjectOperand(); 544 if (colRef != null && "*".equals(colRef.getColumnNameOnly())) { 545 // This is a star column - trace through to find the actual column 546 return traceColumnThroughStarInCTE(cteNs, exposedName); 547 } 548 } 549 550 // Check if the expression is a simple column reference 551 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 552 return null; 553 } 554 555 TObjectName colRef = expr.getObjectOperand(); 556 if (colRef == null) { 557 return null; 558 } 559 560 // Check if the column has a table qualifier (e.g., "b.band_name") 561 String tableQualifier = colRef.getTableString(); 562 if (tableQualifier == null || tableQualifier.isEmpty()) { 563 // No qualifier - can't determine which table 564 return null; 565 } 566 567 // Get the CTE's subquery to find the table with matching alias 568 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 569 if (cteSubquery == null || cteSubquery.tables == null) { 570 return null; 571 } 572 573 // Search for the table with matching alias or name 574 for (int i = 0; i < cteSubquery.tables.size(); i++) { 575 TTable table = cteSubquery.tables.getTable(i); 576 if (table == null) continue; 577 578 // Check alias match 579 String alias = table.getAliasName(); 580 if (alias != null && alias.equalsIgnoreCase(tableQualifier)) { 581 // Found the table - now trace to its final physical table if needed 582 return traceToPhysicalTable(table); 583 } 584 585 // Check table name match (for unaliased tables) 586 String tableName = table.getTableName() != null ? table.getTableName().toString() : null; 587 if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) { 588 return traceToPhysicalTable(table); 589 } 590 } 591 592 return null; 593 } 594 595 /** 596 * Trace a specific column through a CTE that uses SELECT *. 597 * This finds the underlying CTE that defines the column and traces it to the correct table. 598 * 599 * @param cteNs The CTE namespace with SELECT * 600 * @param columnName The name of the column to trace 601 * @return The correct source table, or null if unable to determine 602 */ 603 private TTable traceColumnThroughStarInCTE(CTENamespace cteNs, String columnName) { 604 if (columnName == null || columnName.isEmpty()) { 605 return null; 606 } 607 608 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 609 if (cteSubquery == null || cteSubquery.tables == null) { 610 return null; 611 } 612 613 // Find the underlying CTE or table that the star column references 614 for (int i = 0; i < cteSubquery.tables.size(); i++) { 615 TTable table = cteSubquery.tables.getTable(i); 616 if (table == null) continue; 617 618 // If it's a CTE reference, look for the column in that CTE 619 if (table.isCTEName() && table.getCTE() != null) { 620 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 621 TSelectSqlStatement underlyingSubquery = underlyingCte.getSubquery(); 622 if (underlyingSubquery != null) { 623 // Look for the column in the underlying CTE's SELECT list 624 TTable tracedTable = findColumnInSelectList(underlyingSubquery, columnName); 625 if (tracedTable != null) { 626 return tracedTable; 627 } 628 } 629 } 630 } 631 632 return null; 633 } 634 635 /** 636 * Find a column by name in a SELECT list and trace it to its source table. 637 * 638 * @param selectStmt The SELECT statement to search 639 * @param columnName The column name to find 640 * @return The source table for the column, or null if not found 641 */ 642 private TTable findColumnInSelectList(TSelectSqlStatement selectStmt, String columnName) { 643 if (selectStmt == null || selectStmt.getResultColumnList() == null) { 644 return null; 645 } 646 647 gudusoft.gsqlparser.nodes.TResultColumnList resultList = selectStmt.getResultColumnList(); 648 for (int i = 0; i < resultList.size(); i++) { 649 TResultColumn rc = resultList.getResultColumn(i); 650 if (rc == null) continue; 651 652 // Get the exposed name (alias or column name) 653 String exposedColName = rc.getAliasClause() != null 654 ? rc.getAliasClause().toString() 655 : (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null 656 ? rc.getExpr().getObjectOperand().getColumnNameOnly() 657 : null); 658 659 if (exposedColName != null && exposedColName.equalsIgnoreCase(columnName)) { 660 // Found the column - trace it to its source table 661 TExpression expr = rc.getExpr(); 662 if (expr != null && expr.getExpressionType() == EExpressionType.simple_object_name_t) { 663 TObjectName colRef = expr.getObjectOperand(); 664 if (colRef != null) { 665 String tableQualifier = colRef.getTableString(); 666 if (tableQualifier != null && !tableQualifier.isEmpty()) { 667 // Find the table with this qualifier in the FROM clause 668 if (selectStmt.tables != null) { 669 for (int j = 0; j < selectStmt.tables.size(); j++) { 670 TTable table = selectStmt.tables.getTable(j); 671 if (table == null) continue; 672 673 String alias = table.getAliasName(); 674 if (alias != null && alias.equalsIgnoreCase(tableQualifier)) { 675 return traceToPhysicalTable(table); 676 } 677 678 String tableName = table.getTableName() != null 679 ? table.getTableName().toString() : null; 680 if (tableName != null && tableName.equalsIgnoreCase(tableQualifier)) { 681 return traceToPhysicalTable(table); 682 } 683 } 684 } 685 } 686 } 687 } 688 } 689 } 690 691 return null; 692 } 693 694 /** 695 * Trace a table to its underlying physical table. 696 * Handles CTEs, subqueries, and JOINs. 697 */ 698 private TTable traceToPhysicalTable(TTable table) { 699 if (table == null) { 700 return null; 701 } 702 703 // If it's already a physical table, return it 704 if (table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !table.isCTEName()) { 705 return table; 706 } 707 708 // If it's a CTE reference, trace through the CTE 709 if (table.isCTEName() && table.getCTE() != null) { 710 // Use a simple approach - get the first physical table from the CTE 711 // This could be enhanced to trace specific columns through nested CTEs 712 gudusoft.gsqlparser.nodes.TCTE nestedCte = table.getCTE(); 713 if (nestedCte.getSubquery() != null && nestedCte.getSubquery().tables != null) { 714 for (int i = 0; i < nestedCte.getSubquery().tables.size(); i++) { 715 TTable nestedTable = nestedCte.getSubquery().tables.getTable(i); 716 TTable physical = traceToPhysicalTable(nestedTable); 717 if (physical != null) { 718 return physical; 719 } 720 } 721 } 722 } 723 724 // If it's a subquery, trace through it 725 if (table.getSubquery() != null) { 726 SubqueryNamespace nestedNs = new SubqueryNamespace( 727 table.getSubquery(), 728 table.getAliasName(), 729 null // nameMatcher not needed for simple tracing 730 ); 731 return nestedNs.getFinalTable(); 732 } 733 734 return null; 735 } 736 737 /** 738 * Get all physical tables that this column might originate from. 739 * 740 * <p>For columns from UNION queries, this returns tables from ALL branches, 741 * not just the first one. This is essential for proper lineage tracking 742 * where a column like {@code actor_id} in a UNION query should be linked 743 * to {@code actor.actor_id}, {@code actor2.actor_id}, {@code actor3.actor_id}.</p> 744 * 745 * <p>For regular single-table sources, this returns a single-element list 746 * with the same table as {@link #getFinalTable()}.</p> 747 * 748 * @return List of all physical tables, or empty list if unable to determine 749 */ 750 public java.util.List<TTable> getAllFinalTables() { 751 // If this ColumnSource has explicit candidateTables set (e.g., from UNION inference), 752 // use those instead of delegating to namespace. This is critical for UNION queries 753 // where only branches with SELECT * should contribute candidate tables for inferred columns. 754 // An EMPTY list means "no matching tables" - return it as-is without delegating. 755 // A NULL means "not determined" - delegate to namespace. 756 if (candidateTables != null) { 757 return candidateTables; 758 } 759 760 if (sourceNamespace == null) { 761 if (overrideTable != null) { 762 return java.util.Collections.singletonList(overrideTable); 763 } 764 return java.util.Collections.emptyList(); 765 } 766 767 // For calculated columns and aliases in SubqueryNamespace, don't trace 768 if (sourceNamespace instanceof SubqueryNamespace) { 769 if (isCalculatedColumn() || isColumnAlias()) { 770 return java.util.Collections.emptyList(); 771 } 772 } 773 774 // For CTENamespace calculated/alias/explicit columns, trace to CTE itself 775 if (sourceNamespace instanceof CTENamespace) { 776 if (isCalculatedColumn() || isColumnAlias() || isCTEExplicitColumn()) { 777 TTable cteTable = ((CTENamespace) sourceNamespace).getReferencingTable(); 778 if (cteTable != null) { 779 return java.util.Collections.singletonList(cteTable); 780 } 781 return java.util.Collections.emptyList(); 782 } 783 } 784 785 // Delegate to namespace - handles UNION queries via UnionNamespace.getAllFinalTables() 786 return sourceNamespace.getAllFinalTables(); 787 } 788 789 /** 790 * Check if this column is a passthrough reference to an underlying alias. 791 * 792 * <p>A passthrough column is a simple column reference in a subquery that 793 * references another column from its FROM clause. If that underlying column 794 * is an alias, then this passthrough should not trace to the base table.</p> 795 * 796 * <p>Example: In {@code SELECT stat_typ FROM (SELECT stat_typ = col FROM t) AS b}, 797 * the outer {@code stat_typ} is a passthrough to {@code b.stat_typ}, which is an alias.</p> 798 * 799 * @return true if this is a passthrough to an alias 800 */ 801 private boolean isPassthroughToAlias() { 802 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 803 return false; 804 } 805 806 TResultColumn rc = (TResultColumn) definitionNode; 807 TExpression expr = rc.getExpr(); 808 if (expr == null) { 809 return false; 810 } 811 812 // Only check simple column references (passthroughs) 813 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 814 return false; 815 } 816 817 // If this column itself has an alias that differs, it's already handled by isColumnAlias() 818 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 819 return false; 820 } 821 822 // Get the column name being referenced 823 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 824 if (objName == null) { 825 return false; 826 } 827 String columnName = objName.getColumnNameOnly(); 828 if (columnName == null || columnName.isEmpty()) { 829 return false; 830 } 831 832 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 833 if (sourceNamespace instanceof SubqueryNamespace) { 834 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 835 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 836 if (underlyingSource != null) { 837 // Check if the underlying column is an alias or calculated 838 if (underlyingSource.isColumnAlias() || underlyingSource.isCalculatedColumn()) { 839 return true; 840 } 841 // Recursively check if it's a passthrough to alias 842 if (underlyingSource.isPassthroughToAlias()) { 843 return true; 844 } 845 } 846 } 847 848 return false; 849 } 850 851 /** 852 * Check if this subquery column is a passthrough reference to a calculated column. 853 * 854 * <p>A subquery column is a passthrough to calculated if:</p> 855 * <ol> 856 * <li>The column definition is a simple column reference (e.g., kko_lfz_9)</li> 857 * <li>The referenced column in the FROM scope is calculated (CASE, function, etc.)</li> 858 * </ol> 859 * 860 * <p>Example:</p> 861 * <pre> 862 * SELECT kko_lfz_9 AS KKO_LFZ_9 863 * FROM (SELECT CASE WHEN... END AS kko_lfz_9 FROM t) subq 864 * </pre> 865 * <p>Here, kko_lfz_9 in the outer query is a passthrough to a calculated column in subq.</p> 866 * 867 * <p>This differs from {@link #isPassthroughToAlias()} which skips columns with aliases. 868 * Here we check even aliased passthroughs to see if they reference calculated columns.</p> 869 * 870 * @return true if this is a passthrough to a calculated column in a subquery 871 */ 872 private boolean isPassthroughToCalculatedInSubquery() { 873 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 874 return false; 875 } 876 877 TResultColumn rc = (TResultColumn) definitionNode; 878 TExpression expr = rc.getExpr(); 879 if (expr == null) { 880 return false; 881 } 882 883 // Only check simple column references (passthroughs) 884 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 885 return false; 886 } 887 888 // Get the column name being referenced 889 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 890 if (objName == null) { 891 return false; 892 } 893 String columnName = objName.getColumnNameOnly(); 894 if (columnName == null || columnName.isEmpty()) { 895 return false; 896 } 897 898 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 899 if (sourceNamespace instanceof SubqueryNamespace) { 900 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 901 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 902 if (underlyingSource != null) { 903 // Check if the underlying column is calculated 904 if (underlyingSource.isCalculatedColumn()) { 905 return true; 906 } 907 // Recursively check if it's a passthrough to calculated 908 if (underlyingSource.isPassthroughToCalculatedInSubquery()) { 909 return true; 910 } 911 } 912 } 913 914 return false; 915 } 916 917 /** 918 * Check if this CTE column is a passthrough reference to a calculated column in a subquery or nested CTE. 919 * 920 * <p>A CTE column is a passthrough to calculated if:</p> 921 * <ol> 922 * <li>The column definition is a simple qualified column reference (e.g., subq.calc_col or cte.calc_col)</li> 923 * <li>The qualifier refers to a subquery or CTE in the CTE's body</li> 924 * <li>The referenced column in that subquery/CTE is calculated (CASE, function, etc.)</li> 925 * </ol> 926 * 927 * <p>Example with subquery:</p> 928 * <pre> 929 * WITH DataCTE AS ( 930 * SELECT ErrorCountsCTE.ErrorSeverityCategory -- passthrough 931 * FROM (SELECT CASE...END AS ErrorSeverityCategory FROM t) ErrorCountsCTE 932 * ) 933 * </pre> 934 * 935 * <p>Example with nested CTE:</p> 936 * <pre> 937 * WITH attendance_summary AS ( 938 * SELECT date_trunc('month', attendance_date) as month FROM attendance 939 * ) 940 * WITH outer_cte AS ( 941 * SELECT a.month FROM attendance_summary a -- passthrough to calculated in nested CTE 942 * ) 943 * </pre> 944 * 945 * @return true if this is a passthrough to a calculated column in a CTE 946 */ 947 private boolean isPassthroughToCalculatedInCTE() { 948 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 949 return false; 950 } 951 952 TResultColumn rc = (TResultColumn) definitionNode; 953 TExpression expr = rc.getExpr(); 954 if (expr == null) { 955 return false; 956 } 957 958 // Only check simple qualified column references (passthroughs like subq.column) 959 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 960 return false; 961 } 962 963 // Get the column reference 964 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 965 if (objName == null) { 966 return false; 967 } 968 969 // Must have a table qualifier (e.g., "ErrorCountsCTE" in "ErrorCountsCTE.ErrorSeverityCategory") 970 String tableQualifier = objName.getTableString(); 971 if (tableQualifier == null || tableQualifier.isEmpty()) { 972 return false; 973 } 974 975 String columnName = objName.getColumnNameOnly(); 976 if (columnName == null || columnName.isEmpty()) { 977 return false; 978 } 979 980 // Get the CTE's subquery to find the referenced subquery alias 981 if (!(sourceNamespace instanceof CTENamespace)) { 982 return false; 983 } 984 985 CTENamespace cteNs = (CTENamespace) sourceNamespace; 986 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 987 if (cte == null || cte.getSubquery() == null) { 988 return false; 989 } 990 991 // Find the subquery/table with this alias in the CTE's body 992 gudusoft.gsqlparser.stmt.TSelectSqlStatement cteBody = cte.getSubquery(); 993 TTable referencedTable = findTableByAlias(cteBody, tableQualifier); 994 if (referencedTable == null) { 995 return false; 996 } 997 998 // Case 1: Referenced table is a subquery 999 if (referencedTable.getSubquery() != null) { 1000 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = referencedTable.getSubquery(); 1001 return isCalculatedColumnInSelect(subquery, columnName); 1002 } 1003 1004 // Case 2: Referenced table is a CTE reference 1005 if (referencedTable.isCTEName() && referencedTable.getCTE() != null) { 1006 gudusoft.gsqlparser.nodes.TCTE referencedCTE = referencedTable.getCTE(); 1007 if (referencedCTE.getSubquery() != null) { 1008 return isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName); 1009 } 1010 } 1011 1012 return false; 1013 } 1014 1015 /** 1016 * Find a table in a SELECT statement by its alias. 1017 */ 1018 private TTable findTableByAlias(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String alias) { 1019 if (select == null || select.tables == null || alias == null) { 1020 return null; 1021 } 1022 1023 for (int i = 0; i < select.tables.size(); i++) { 1024 TTable table = select.tables.getTable(i); 1025 if (table != null) { 1026 String tableAlias = table.getAliasName(); 1027 if (tableAlias != null && tableAlias.equalsIgnoreCase(alias)) { 1028 return table; 1029 } 1030 // Also check table name for non-aliased references 1031 if (tableAlias == null && table.getTableName() != null) { 1032 String tableName = table.getTableName().toString(); 1033 if (tableName != null && tableName.equalsIgnoreCase(alias)) { 1034 return table; 1035 } 1036 } 1037 } 1038 } 1039 return null; 1040 } 1041 1042 /** 1043 * Check if a column in a SELECT statement is calculated (not a simple column reference). 1044 */ 1045 private boolean isCalculatedColumnInSelect(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1046 if (select == null || select.getResultColumnList() == null || columnName == null) { 1047 return false; 1048 } 1049 1050 for (int i = 0; i < select.getResultColumnList().size(); i++) { 1051 TResultColumn rc = select.getResultColumnList().getResultColumn(i); 1052 if (rc == null) continue; 1053 1054 // Get the column name for this result column 1055 String rcName = null; 1056 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1057 rcName = rc.getAliasClause().getAliasName().toString(); 1058 } else if (rc.getExpr() != null && 1059 rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t && 1060 rc.getExpr().getObjectOperand() != null) { 1061 rcName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 1062 } 1063 1064 if (rcName != null && rcName.equalsIgnoreCase(columnName)) { 1065 // Found the column - check if it's calculated 1066 TExpression expr = rc.getExpr(); 1067 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1068 // Non-simple expression = calculated 1069 return true; 1070 } 1071 } 1072 } 1073 return false; 1074 } 1075 1076 /** 1077 * Check if this column source represents a calculated expression. 1078 * 1079 * <p>A column is calculated if its definition is a TResultColumn with 1080 * a non-simple expression (not a direct column reference or star).</p> 1081 * 1082 * <p>For inferred columns (via star expansion), we trace back to the 1083 * source CTE/subquery to check if the original column is calculated.</p> 1084 * 1085 * @return true if this is a calculated column 1086 */ 1087 public boolean isCalculatedColumn() { 1088 if (definitionNode == null) { 1089 // For inferred columns through star expansion, check if the underlying 1090 // column in the source CTE/subquery is calculated 1091 return isInferredFromCalculatedColumn(); 1092 } 1093 1094 if (!(definitionNode instanceof TResultColumn)) { 1095 return false; 1096 } 1097 1098 TResultColumn rc = (TResultColumn) definitionNode; 1099 TExpression expr = rc.getExpr(); 1100 if (expr == null) { 1101 return false; 1102 } 1103 1104 EExpressionType exprType = expr.getExpressionType(); 1105 1106 // Simple column reference - NOT calculated (passthrough) 1107 if (exprType == EExpressionType.simple_object_name_t) { 1108 return false; 1109 } 1110 1111 // Star column - NOT calculated (passthrough) 1112 String colText = rc.toString(); 1113 if (colText != null && colText.endsWith("*")) { 1114 return false; 1115 } 1116 1117 // SQL Server proprietary column alias (col = expr) 1118 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1119 if (expr.getRightOperand() != null && 1120 expr.getRightOperand().getExpressionType() == EExpressionType.simple_object_name_t) { 1121 return false; 1122 } 1123 } 1124 1125 // Any other expression type is calculated 1126 return true; 1127 } 1128 1129 /** 1130 * Check if this is an inferred column (via star expansion) that originates from 1131 * a calculated column in the source CTE/subquery. 1132 * 1133 * <p>When a column is resolved through star expansion (e.g., SELECT * FROM CTE), 1134 * the definitionNode is null. We need to trace back to the source namespace 1135 * to check if the original column is calculated.</p> 1136 * 1137 * @return true if this inferred column traces back to a calculated column 1138 */ 1139 private boolean isInferredFromCalculatedColumn() { 1140 // Only check for inferred columns (evidence contains "auto_inferred") 1141 if (evidence == null || !evidence.contains("auto_inferred")) { 1142 return false; 1143 } 1144 1145 // Need the source namespace and column name to trace 1146 if (sourceNamespace == null || exposedName == null) { 1147 return false; 1148 } 1149 1150 // For CTE namespace, check if the column is calculated in the CTE's SELECT list 1151 if (sourceNamespace instanceof CTENamespace) { 1152 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1153 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 1154 if (cte != null && cte.getSubquery() != null) { 1155 // First check the CTE's direct SELECT list 1156 if (isCalculatedColumnInSelect(cte.getSubquery(), exposedName)) { 1157 return true; 1158 } 1159 1160 // If the CTE has a star column, trace through to referenced CTEs 1161 if (cteNs.hasStarColumn()) { 1162 return isCalculatedInCTEChain(cte.getSubquery(), exposedName); 1163 } 1164 } 1165 } 1166 1167 // For Subquery namespace, check if the column is calculated in the subquery's SELECT list 1168 if (sourceNamespace instanceof SubqueryNamespace) { 1169 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1170 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 1171 if (subquery != null) { 1172 // First check the subquery's direct SELECT list 1173 if (isCalculatedColumnInSelect(subquery, exposedName)) { 1174 return true; 1175 } 1176 1177 // If the subquery has a star column, trace through to source tables 1178 if (subNs.hasStarColumn()) { 1179 return isCalculatedInSubqueryChain(subquery, exposedName); 1180 } 1181 } 1182 } 1183 1184 return false; 1185 } 1186 1187 /** 1188 * Check if a column is calculated by tracing through CTE references. 1189 * This handles cases like Stage4 -> Stage3 -> Stage2 where the column 1190 * is calculated at some intermediate level. 1191 */ 1192 private boolean isCalculatedInCTEChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1193 if (select == null || select.tables == null) { 1194 return false; 1195 } 1196 1197 // Look for CTE references in the FROM clause 1198 for (int i = 0; i < select.tables.size(); i++) { 1199 TTable table = select.tables.getTable(i); 1200 if (table != null && table.isCTEName() && table.getCTE() != null) { 1201 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1202 if (referencedCTE.getSubquery() != null) { 1203 // Check if the column is calculated in this CTE 1204 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1205 return true; 1206 } 1207 // Recursively check the CTE chain 1208 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) { 1209 return true; 1210 } 1211 } 1212 } 1213 } 1214 return false; 1215 } 1216 1217 /** 1218 * Check if a column is calculated by tracing through subquery references. 1219 */ 1220 private boolean isCalculatedInSubqueryChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1221 if (select == null || select.tables == null) { 1222 return false; 1223 } 1224 1225 // Look for subquery tables in the FROM clause 1226 for (int i = 0; i < select.tables.size(); i++) { 1227 TTable table = select.tables.getTable(i); 1228 if (table != null && table.getSubquery() != null) { 1229 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery(); 1230 // Check if the column is calculated in this subquery 1231 if (isCalculatedColumnInSelect(subquery, columnName)) { 1232 return true; 1233 } 1234 // Recursively check the subquery chain 1235 if (isCalculatedInSubqueryChain(subquery, columnName)) { 1236 return true; 1237 } 1238 } 1239 // Also check CTE references within subqueries 1240 if (table != null && table.isCTEName() && table.getCTE() != null) { 1241 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1242 if (referencedCTE.getSubquery() != null) { 1243 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1244 return true; 1245 } 1246 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName)) { 1247 return true; 1248 } 1249 } 1250 } 1251 } 1252 return false; 1253 } 1254 1255 /** 1256 * Check if this column source represents a column alias (renamed column). 1257 * 1258 * <p>A column is an alias if it's a simple column reference in a subquery 1259 * that has been given a different name via AS or NAMED. For example:</p> 1260 * <ul> 1261 * <li>{@code SELECT col AS alias FROM table} - alias is different from col</li> 1262 * <li>{@code SELECT col (NAMED alias) FROM table} - Teradata NAMED syntax</li> 1263 * <li>{@code SELECT alias = col FROM table} - SQL Server proprietary syntax</li> 1264 * </ul> 1265 * 1266 * <p>Column aliases should NOT trace to base tables because the alias name 1267 * doesn't exist as an actual column in the base table.</p> 1268 * 1269 * @return true if this is a column alias with a different name than the original 1270 */ 1271 public boolean isColumnAlias() { 1272 if (definitionNode == null) { 1273 return false; 1274 } 1275 1276 if (!(definitionNode instanceof TResultColumn)) { 1277 return false; 1278 } 1279 1280 TResultColumn rc = (TResultColumn) definitionNode; 1281 TExpression expr = rc.getExpr(); 1282 if (expr == null) { 1283 return false; 1284 } 1285 1286 EExpressionType exprType = expr.getExpressionType(); 1287 1288 // Handle SQL Server proprietary alias syntax: alias = column 1289 // Example: stat_typ = stellplatz_typ 1290 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1291 TExpression rightExpr = expr.getRightOperand(); 1292 TExpression leftExpr = expr.getLeftOperand(); 1293 // Only if right side is a simple column reference 1294 if (rightExpr != null && leftExpr != null && 1295 rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 1296 gudusoft.gsqlparser.nodes.TObjectName rightObjName = rightExpr.getObjectOperand(); 1297 gudusoft.gsqlparser.nodes.TObjectName leftObjName = leftExpr.getObjectOperand(); 1298 if (rightObjName != null && leftObjName != null) { 1299 String origName = rightObjName.getColumnNameOnly(); 1300 String aliasName = leftObjName.getColumnNameOnly(); 1301 // If alias name differs from original column name, it's an alias 1302 if (origName != null && aliasName != null && 1303 !origName.equalsIgnoreCase(aliasName)) { 1304 return true; 1305 } 1306 } 1307 } 1308 return false; 1309 } 1310 1311 // Standard alias syntax: column AS alias 1312 // Only applies to simple column references 1313 if (exprType != EExpressionType.simple_object_name_t) { 1314 return false; 1315 } 1316 1317 // Check if there's an alias that differs from the column name 1318 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1319 String aliasName = rc.getAliasClause().getAliasName().toString(); 1320 if (aliasName != null && !aliasName.isEmpty()) { 1321 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1322 if (objName != null) { 1323 String origName = objName.getColumnNameOnly(); 1324 // If alias name differs from original name, it's an alias 1325 if (origName != null && !origName.equalsIgnoreCase(aliasName)) { 1326 return true; 1327 } 1328 } 1329 } 1330 } 1331 1332 return false; 1333 } 1334 1335 /** 1336 * Check if this column is a CTE explicit column with a different name than the underlying column. 1337 * 1338 * <p>A CTE explicit column is one defined in the CTE's column list that maps to a 1339 * different column name in the CTE's SELECT list. For example:</p> 1340 * <pre> 1341 * WITH cte(c1, c2) AS (SELECT id, name FROM users) 1342 * SELECT c1 FROM cte -- c1 maps to 'id', names differ 1343 * </pre> 1344 * 1345 * <p>CTE explicit columns should NOT trace to base tables because the explicit 1346 * column name (c1) doesn't exist as an actual column in the base table (users).</p> 1347 * 1348 * @return true if this is a CTE explicit column with a different name 1349 */ 1350 public boolean isCTEExplicitColumn() { 1351 // Must be from a CTENamespace 1352 if (!(sourceNamespace instanceof CTENamespace)) { 1353 return false; 1354 } 1355 1356 // Check evidence for explicit column marker 1357 if (!"cte_explicit_column".equals(evidence)) { 1358 return false; 1359 } 1360 1361 // Get the underlying column name from the definition node 1362 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1363 return false; 1364 } 1365 1366 TResultColumn rc = (TResultColumn) definitionNode; 1367 TExpression expr = rc.getExpr(); 1368 if (expr == null) { 1369 return false; 1370 } 1371 1372 // Get the column name from the SELECT list item 1373 String underlyingName = null; 1374 1375 // Check for alias first 1376 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1377 underlyingName = rc.getAliasClause().getAliasName().toString(); 1378 } 1379 // Then check for simple column reference 1380 else if (expr.getExpressionType() == EExpressionType.simple_object_name_t && 1381 expr.getObjectOperand() != null) { 1382 underlyingName = expr.getObjectOperand().getColumnNameOnly(); 1383 } 1384 1385 // If we can't determine the underlying name, assume it's different 1386 // (calculated expressions, etc. are definitely different from explicit column names) 1387 if (underlyingName == null) { 1388 return true; 1389 } 1390 1391 // If the exposed name differs from the underlying column name, it's an explicit column rename 1392 return !exposedName.equalsIgnoreCase(underlyingName); 1393 } 1394 1395 /** 1396 * Get the override table, if set. 1397 */ 1398 public TTable getOverrideTable() { 1399 return overrideTable; 1400 } 1401 1402 /** 1403 * Get the candidate tables for ambiguous columns. 1404 * 1405 * <p>When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 1406 * this returns all possible source tables. End users can iterate through this 1407 * list to understand all potential sources for the column.</p> 1408 * 1409 * @return List of candidate tables, or empty list if not ambiguous 1410 */ 1411 public List<TTable> getCandidateTables() { 1412 return candidateTables != null ? candidateTables : Collections.emptyList(); 1413 } 1414 1415 /** 1416 * Check if this column has multiple candidate tables (is ambiguous). 1417 * 1418 * @return true if there are multiple candidate tables 1419 */ 1420 public boolean isAmbiguous() { 1421 return candidateTables != null && candidateTables.size() > 1; 1422 } 1423 1424 /** 1425 * Get the field path for deep/record field access. 1426 * 1427 * <p>When a column reference includes field access beyond the base column, 1428 * this returns the field path. For example, in {@code customer.address.city}, 1429 * if base column is {@code customer}, this returns a FieldPath with 1430 * segments {@code ["address", "city"]}.</p> 1431 * 1432 * @return The field path, or null if no field access 1433 */ 1434 public FieldPath getFieldPath() { 1435 return fieldPath; 1436 } 1437 1438 /** 1439 * Check if this column source has a field path (deep/record field access). 1440 * 1441 * @return true if a non-empty field path exists 1442 */ 1443 public boolean hasFieldPath() { 1444 return fieldPath != null && !fieldPath.isEmpty(); 1445 } 1446 1447 /** 1448 * Check if this is a struct field access (has evidence "struct_field_access"). 1449 * 1450 * <p>This is a convenience method for checking if this column source represents 1451 * a struct/record field dereference operation.</p> 1452 * 1453 * @return true if this is a struct field access 1454 */ 1455 public boolean isStructFieldAccess() { 1456 return "struct_field_access".equals(evidence); 1457 } 1458 1459 /** 1460 * Checks if this is a definite resolution (confidence = 1.0) 1461 */ 1462 public boolean isDefinite() { 1463 return confidence >= 1.0; 1464 } 1465 1466 /** 1467 * Checks if this is an inferred resolution (confidence < 1.0) 1468 */ 1469 public boolean isInferred() { 1470 return confidence < 1.0; 1471 } 1472 1473 @Override 1474 public String toString() { 1475 StringBuilder sb = new StringBuilder(); 1476 sb.append(exposedName); 1477 if (sourceNamespace != null) { 1478 sb.append(" from ").append(sourceNamespace.getDisplayName()); 1479 } 1480 if (confidence < 1.0) { 1481 sb.append(String.format(" (confidence: %.2f)", confidence)); 1482 } 1483 return sb.toString(); 1484 } 1485 1486 /** 1487 * Creates a copy with updated confidence and evidence. 1488 * Used when merging or updating inference results. 1489 * 1490 * @deprecated Use {@link #withEvidence(ResolutionEvidence)} instead 1491 */ 1492 public ColumnSource withConfidence(double newConfidence, String newEvidence) { 1493 return new ColumnSource( 1494 this.sourceNamespace, 1495 this.exposedName, 1496 this.definitionNode, 1497 newConfidence, 1498 newEvidence, 1499 this.overrideTable, 1500 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1501 null, // will create from legacy evidence 1502 this.fieldPath 1503 ); 1504 } 1505 1506 /** 1507 * Creates a copy with updated ResolutionEvidence. 1508 * This is the preferred method for updating evidence in new code. 1509 * 1510 * @param newEvidence The new evidence detail 1511 * @return A new ColumnSource with updated evidence 1512 */ 1513 public ColumnSource withEvidence(ResolutionEvidence newEvidence) { 1514 return new ColumnSource( 1515 this.sourceNamespace, 1516 this.exposedName, 1517 this.definitionNode, 1518 newEvidence != null ? newEvidence.getWeight() : this.confidence, 1519 newEvidence != null ? newEvidence.toLegacyEvidence() : this.evidence, 1520 this.overrideTable, 1521 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1522 newEvidence, 1523 this.fieldPath 1524 ); 1525 } 1526 1527 /** 1528 * Creates a copy with candidate tables. 1529 * Used when a column could come from multiple tables. 1530 */ 1531 public ColumnSource withCandidateTables(List<TTable> candidates) { 1532 return new ColumnSource( 1533 this.sourceNamespace, 1534 this.exposedName, 1535 this.definitionNode, 1536 this.confidence, 1537 this.evidence, 1538 this.overrideTable, 1539 candidates != null ? new java.util.ArrayList<>(candidates) : null, 1540 this.evidenceDetail, 1541 this.fieldPath 1542 ); 1543 } 1544 1545 /** 1546 * Creates a copy with a field path for deep/record field access. 1547 * 1548 * <p>This method is used when resolving struct/record field access patterns 1549 * like {@code customer.address.city}. The base column is preserved as the 1550 * exposedName, and the field path captures the remaining segments.</p> 1551 * 1552 * @param newFieldPath The field path segments (beyond the base column) 1553 * @return A new ColumnSource with the field path set 1554 */ 1555 public ColumnSource withFieldPath(FieldPath newFieldPath) { 1556 return new ColumnSource( 1557 this.sourceNamespace, 1558 this.exposedName, 1559 this.definitionNode, 1560 this.confidence, 1561 this.evidence, 1562 this.overrideTable, 1563 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1564 this.evidenceDetail, 1565 newFieldPath 1566 ); 1567 } 1568 1569 /** 1570 * Creates a copy with a field path from a list of segments. 1571 * 1572 * <p>Convenience method for creating a ColumnSource with a field path 1573 * from a list of string segments.</p> 1574 * 1575 * @param segments The field path segments 1576 * @return A new ColumnSource with the field path set 1577 */ 1578 public ColumnSource withFieldPath(List<String> segments) { 1579 return withFieldPath(FieldPath.of(segments)); 1580 } 1581 1582 /** 1583 * Creates a copy with field path and updated evidence. 1584 * 1585 * <p>This method is used when resolving struct field access, combining 1586 * both the field path and the struct_field_access evidence marker.</p> 1587 * 1588 * @param newFieldPath The field path segments 1589 * @param newEvidence The evidence string (e.g., "struct_field_access") 1590 * @return A new ColumnSource with field path and evidence updated 1591 */ 1592 public ColumnSource withFieldPath(FieldPath newFieldPath, String newEvidence) { 1593 return new ColumnSource( 1594 this.sourceNamespace, 1595 this.exposedName, 1596 this.definitionNode, 1597 this.confidence, 1598 newEvidence, 1599 this.overrideTable, 1600 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1601 null, // will create from legacy evidence 1602 newFieldPath 1603 ); 1604 } 1605 1606 /** 1607 * Count the number of "real" tables in a table list, excluding implicit lateral derived tables. 1608 * 1609 * <p>Teradata supports implicit lateral derived tables, which are auto-added when a column 1610 * references an undeclared table in the WHERE clause. These should not be counted when 1611 * determining if a subquery has multiple tables for column resolution purposes.</p> 1612 * 1613 * @param tables The table list to count 1614 * @return The number of real (non-implicit) tables 1615 */ 1616 private static int countRealTables(gudusoft.gsqlparser.nodes.TTableList tables) { 1617 if (tables == null) { 1618 return 0; 1619 } 1620 int count = 0; 1621 for (int i = 0; i < tables.size(); i++) { 1622 TTable table = tables.getTable(i); 1623 if (table != null && table.getEffectType() != gudusoft.gsqlparser.ETableEffectType.tetImplicitLateralDerivedTable) { 1624 count++; 1625 } 1626 } 1627 return count; 1628 } 1629 1630 /** 1631 * Check if a SELECT statement has a qualified star column (e.g., ta.*, tb.*). 1632 * Qualified stars identify which table columns come from in multi-table subqueries. 1633 */ 1634 private static boolean hasQualifiedStar(gudusoft.gsqlparser.stmt.TSelectSqlStatement select) { 1635 if (select == null || select.getResultColumnList() == null) { 1636 return false; 1637 } 1638 gudusoft.gsqlparser.nodes.TResultColumnList resultCols = select.getResultColumnList(); 1639 for (int i = 0; i < resultCols.size(); i++) { 1640 TResultColumn rc = resultCols.getResultColumn(i); 1641 if (rc != null) { 1642 String colStr = rc.toString().trim(); 1643 // Qualified star has format "alias.*" or "table.*" 1644 if (colStr.endsWith("*") && colStr.contains(".")) { 1645 return true; 1646 } 1647 } 1648 } 1649 return false; 1650 } 1651 1652 /** 1653 * Check if a column exists in a table's DDL definition. 1654 * 1655 * <p>This method checks the table's column definitions (from CREATE TABLE statements 1656 * parsed in the same script) to verify if the column name is defined.</p> 1657 * 1658 * @param table The table to check 1659 * @param columnName The column name to look for 1660 * @return true if the column exists in the table's DDL, false if not found or no DDL available 1661 */ 1662 public static boolean isColumnInTableDdl(TTable table, String columnName) { 1663 if (table == null || columnName == null || columnName.isEmpty()) { 1664 return false; 1665 } 1666 1667 // Check if the table has column definitions (from CREATE TABLE DDL) 1668 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1669 if (columnDefs != null && columnDefs.size() > 0) { 1670 for (int i = 0; i < columnDefs.size(); i++) { 1671 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 1672 if (colDef != null && colDef.getColumnName() != null) { 1673 String defColName = colDef.getColumnName().toString(); 1674 if (defColName != null && defColName.equalsIgnoreCase(columnName)) { 1675 return true; 1676 } 1677 } 1678 } 1679 // DDL exists but column not found 1680 return false; 1681 } 1682 1683 // No DDL available - return false (cannot verify) 1684 return false; 1685 } 1686 1687 /** 1688 * Check if a table has DDL metadata available (from CREATE TABLE in same script). 1689 * 1690 * @param table The table to check 1691 * @return true if DDL metadata is available for this table 1692 */ 1693 public static boolean hasTableDdl(TTable table) { 1694 if (table == null) { 1695 return false; 1696 } 1697 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1698 return columnDefs != null && columnDefs.size() > 0; 1699 } 1700 1701 /** 1702 * Check DDL verification status for a candidate table. 1703 * 1704 * <p>Returns a tri-state result:</p> 1705 * <ul> 1706 * <li>1 = Column exists in table's DDL</li> 1707 * <li>0 = Column NOT found in table's DDL (DDL available but column missing)</li> 1708 * <li>-1 = Cannot verify (no DDL available for this table)</li> 1709 * </ul> 1710 * 1711 * @param table The candidate table to check 1712 * @param columnName The column name to verify 1713 * @return DDL verification status: 1 (exists), 0 (not found), -1 (no DDL) 1714 */ 1715 public static int getDdlVerificationStatus(TTable table, String columnName) { 1716 if (table == null || columnName == null) { 1717 return -1; 1718 } 1719 1720 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 1721 if (columnDefs == null || columnDefs.size() == 0) { 1722 return -1; // No DDL available 1723 } 1724 1725 // DDL available - check if column exists 1726 for (int i = 0; i < columnDefs.size(); i++) { 1727 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 1728 if (colDef != null && colDef.getColumnName() != null) { 1729 String defColName = colDef.getColumnName().toString(); 1730 if (defColName != null && defColName.equalsIgnoreCase(columnName)) { 1731 return 1; // Column exists in DDL 1732 } 1733 } 1734 } 1735 1736 return 0; // DDL exists but column not found 1737 } 1738 1739 /** 1740 * Get DDL verification status for all candidate tables. 1741 * 1742 * <p>Returns a map from each candidate table to its DDL verification status:</p> 1743 * <ul> 1744 * <li>1 = Column exists in table's DDL</li> 1745 * <li>0 = Column NOT found in table's DDL</li> 1746 * <li>-1 = Cannot verify (no DDL available)</li> 1747 * </ul> 1748 * 1749 * @return Map of candidate tables to their DDL verification status, or empty map if no candidates 1750 */ 1751 public java.util.Map<TTable, Integer> getCandidateTableDdlStatus() { 1752 java.util.Map<TTable, Integer> result = new java.util.LinkedHashMap<>(); 1753 if (candidateTables == null || candidateTables.isEmpty() || exposedName == null) { 1754 return result; 1755 } 1756 1757 for (TTable candidate : candidateTables) { 1758 int status = getDdlVerificationStatus(candidate, exposedName); 1759 result.put(candidate, status); 1760 } 1761 return result; 1762 } 1763}