001package gudusoft.gsqlparser.resolver2.model; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.EExpressionType; 005import gudusoft.gsqlparser.nodes.TExpression; 006import gudusoft.gsqlparser.nodes.TObjectName; 007import gudusoft.gsqlparser.nodes.TParseTreeNode; 008import gudusoft.gsqlparser.nodes.TResultColumn; 009import gudusoft.gsqlparser.nodes.TTable; 010import gudusoft.gsqlparser.resolver2.inference.EvidenceType; 011import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 012import gudusoft.gsqlparser.resolver2.matcher.VendorNameMatcher; 013import gudusoft.gsqlparser.resolver2.namespace.AbstractNamespace; 014import gudusoft.gsqlparser.resolver2.namespace.INamespace; 015import gudusoft.gsqlparser.resolver2.namespace.SubqueryNamespace; 016import gudusoft.gsqlparser.resolver2.namespace.CTENamespace; 017import gudusoft.gsqlparser.resolver2.namespace.UnionNamespace; 018import gudusoft.gsqlparser.sqlenv.ESQLDataObjectType; 019import gudusoft.gsqlparser.sqlenv.IdentifierService; 020import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 021 022import java.util.Collections; 023import java.util.HashSet; 024import java.util.List; 025 026/** 027 * Represents the source of a column reference. 028 * Tracks where a column comes from, including intermediate transformations 029 * through subqueries and CTEs. 030 * 031 * Design principles: 032 * 1. Immutable - once created, cannot be modified 033 * 2. Recursive - can trace back through subquery/CTE layers 034 * 3. Confidence-scored - supports evidence-based inference 035 */ 036public class ColumnSource { 037 /** The namespace where this column is exposed (e.g., subquery, table) */ 038 private final INamespace sourceNamespace; 039 040 /** The name by which this column is exposed in the namespace */ 041 private final String exposedName; 042 043 /** The AST node where this column is defined (TResultColumn, TTableColumn, etc.) */ 044 private final TParseTreeNode definitionNode; 045 046 /** Location information for the definition */ 047 private final SourceLocation definitionLocation; 048 049 /** 050 * Confidence score [0.0, 1.0]: 051 * - 1.0: Definite (from metadata or explicit definition) 052 * - 0.7-0.9: High confidence inference (strong evidence) 053 * - 0.5-0.7: Medium confidence inference (some evidence) 054 * - 0.0-0.5: Low confidence guess 055 */ 056 private final double confidence; 057 058 /** 059 * Evidence that supports this resolution. 060 * Used for debugging and explaining inference decisions. 061 * 062 * @deprecated Use {@link #evidenceDetail} instead. This field is kept for backward 063 * compatibility and will be derived from evidenceDetail if not explicitly set. 064 */ 065 private final String evidence; 066 067 /** 068 * Structured evidence detail for this resolution. 069 * Provides type-safe evidence with confidence weight and source traceability. 070 * This is the preferred way to access resolution evidence. 071 * 072 * @see ResolutionEvidence 073 */ 074 private final ResolutionEvidence evidenceDetail; 075 076 /** 077 * Override table for traced columns. 078 * When set, getFinalTable() returns this instead of namespace's table. 079 */ 080 private final TTable overrideTable; 081 082 /** 083 * Candidate tables for ambiguous columns. 084 * When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 085 * this list contains all possible source tables so end users can access them. 086 */ 087 private final List<TTable> candidateTables; 088 089 /** 090 * Field path for deep/record field access (e.g., struct.field.subfield). 091 * 092 * <p>When a column reference includes field access beyond the base column, 093 * this captures the field path. For example, in {@code customer.address.city}, 094 * if base column is {@code customer}, fieldPath contains {@code ["address", "city"]}.</p> 095 * 096 * <p>This field is null or empty for regular column references without field access.</p> 097 * 098 * @see FieldPath 099 */ 100 private final FieldPath fieldPath; 101 102 public ColumnSource(INamespace sourceNamespace, 103 String exposedName, 104 TParseTreeNode definitionNode, 105 double confidence, 106 String evidence) { 107 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, null, null); 108 } 109 110 public ColumnSource(INamespace sourceNamespace, 111 String exposedName, 112 TParseTreeNode definitionNode, 113 double confidence, 114 String evidence, 115 TTable overrideTable) { 116 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, null); 117 } 118 119 public ColumnSource(INamespace sourceNamespace, 120 String exposedName, 121 TParseTreeNode definitionNode, 122 double confidence, 123 String evidence, 124 TTable overrideTable, 125 List<TTable> candidateTables) { 126 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, null, null); 127 } 128 129 /** 130 * Full constructor with all fields including ResolutionEvidence. 131 */ 132 public ColumnSource(INamespace sourceNamespace, 133 String exposedName, 134 TParseTreeNode definitionNode, 135 double confidence, 136 String evidence, 137 TTable overrideTable, 138 List<TTable> candidateTables, 139 ResolutionEvidence evidenceDetail) { 140 this(sourceNamespace, exposedName, definitionNode, confidence, evidence, overrideTable, candidateTables, evidenceDetail, null); 141 } 142 143 /** 144 * Full constructor with all fields including ResolutionEvidence and FieldPath. 145 * 146 * @param sourceNamespace The namespace where this column is exposed 147 * @param exposedName The name by which this column is exposed 148 * @param definitionNode The AST node where this column is defined 149 * @param confidence Confidence score [0.0, 1.0] 150 * @param evidence Evidence string for this resolution 151 * @param overrideTable Override table for traced columns 152 * @param candidateTables Candidate tables for ambiguous columns 153 * @param evidenceDetail Structured evidence detail 154 * @param fieldPath Field path for deep/record field access 155 */ 156 public ColumnSource(INamespace sourceNamespace, 157 String exposedName, 158 TParseTreeNode definitionNode, 159 double confidence, 160 String evidence, 161 TTable overrideTable, 162 List<TTable> candidateTables, 163 ResolutionEvidence evidenceDetail, 164 FieldPath fieldPath) { 165 this.sourceNamespace = sourceNamespace; 166 this.exposedName = exposedName; 167 this.definitionNode = definitionNode; 168 this.definitionLocation = definitionNode != null 169 ? new SourceLocation(definitionNode) 170 : null; 171 this.confidence = Math.max(0.0, Math.min(1.0, confidence)); 172 this.evidence = evidence; 173 this.overrideTable = overrideTable; 174 this.candidateTables = candidateTables != null ? Collections.unmodifiableList(candidateTables) : null; 175 this.fieldPath = fieldPath; 176 // If evidenceDetail not provided, create from legacy evidence 177 if (evidenceDetail != null) { 178 this.evidenceDetail = evidenceDetail; 179 } else if (evidence != null) { 180 this.evidenceDetail = ResolutionEvidence.fromLegacyEvidence(evidence, confidence, definitionNode); 181 } else { 182 this.evidenceDetail = null; 183 } 184 } 185 186 /** 187 * Constructor with ResolutionEvidence (preferred for new code). 188 */ 189 public ColumnSource(INamespace sourceNamespace, 190 String exposedName, 191 TParseTreeNode definitionNode, 192 ResolutionEvidence evidenceDetail) { 193 this(sourceNamespace, exposedName, definitionNode, 194 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 195 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 196 null, null, evidenceDetail); 197 } 198 199 /** 200 * Constructor with ResolutionEvidence and override table. 201 */ 202 public ColumnSource(INamespace sourceNamespace, 203 String exposedName, 204 TParseTreeNode definitionNode, 205 ResolutionEvidence evidenceDetail, 206 TTable overrideTable) { 207 this(sourceNamespace, exposedName, definitionNode, 208 evidenceDetail != null ? evidenceDetail.getWeight() : 1.0, 209 evidenceDetail != null ? evidenceDetail.toLegacyEvidence() : "metadata", 210 overrideTable, null, evidenceDetail); 211 } 212 213 /** 214 * Constructor for definite matches (confidence = 1.0) 215 */ 216 public ColumnSource(INamespace sourceNamespace, 217 String exposedName, 218 TParseTreeNode definitionNode) { 219 this(sourceNamespace, exposedName, definitionNode, 1.0, "metadata"); 220 } 221 222 public INamespace getSourceNamespace() { 223 return sourceNamespace; 224 } 225 226 public String getExposedName() { 227 return exposedName; 228 } 229 230 public TParseTreeNode getDefinitionNode() { 231 return definitionNode; 232 } 233 234 public SourceLocation getDefinitionLocation() { 235 return definitionLocation; 236 } 237 238 public double getConfidence() { 239 return confidence; 240 } 241 242 public String getEvidence() { 243 return evidence; 244 } 245 246 /** 247 * Get the structured evidence detail for this resolution. 248 * 249 * <p>This is the preferred way to access resolution evidence as it provides: 250 * <ul> 251 * <li>Type-safe evidence type (enum)</li> 252 * <li>Confidence weight with clear semantics</li> 253 * <li>Source location for traceability</li> 254 * <li>Human-readable messages</li> 255 * </ul> 256 * 257 * @return The structured evidence detail, or null if not available 258 */ 259 public ResolutionEvidence getEvidenceDetail() { 260 return evidenceDetail; 261 } 262 263 /** 264 * Get the evidence type from the structured evidence detail. 265 * Convenience method for common use cases. 266 * 267 * @return The evidence type, or null if no evidence detail 268 */ 269 public EvidenceType getEvidenceType() { 270 return evidenceDetail != null ? evidenceDetail.getType() : null; 271 } 272 273 /** 274 * Check if this resolution has definite evidence (not inferred). 275 * Definite evidence comes from DDL, metadata, or explicit definitions. 276 * 277 * @return true if evidence is definite 278 */ 279 public boolean hasDefiniteEvidence() { 280 if (evidenceDetail != null) { 281 return evidenceDetail.isDefinite(); 282 } 283 // Fallback: check legacy evidence and confidence 284 if (confidence >= 1.0) { 285 return true; 286 } 287 if (evidence != null) { 288 String lower = evidence.toLowerCase(); 289 return lower.contains("metadata") || lower.contains("ddl") || 290 lower.contains("explicit") || lower.contains("insert_column"); 291 } 292 return false; 293 } 294 295 /** 296 * Get the <b>final</b> physical table this column originates from after tracing 297 * through all subqueries and CTEs. 298 * 299 * <h3>Semantic Difference: getFinalTable() vs TObjectName.getSourceTable()</h3> 300 * <ul> 301 * <li><b>getFinalTable()</b> (this method): The final physical table after 302 * recursively tracing through all subqueries and CTEs. Use this for data lineage.</li> 303 * <li><b>TObjectName.getSourceTable()</b>: The immediate source in the current scope. 304 * For a column from a subquery, this points to the subquery's TTable itself.</li> 305 * </ul> 306 * 307 * <h3>Example</h3> 308 * <pre>{@code 309 * SELECT title FROM (SELECT * FROM books) sub 310 * 311 * For the 'title' column in outer SELECT: 312 * - TObjectName.getSourceTable() → TTable for subquery 'sub' (immediate source) 313 * - ColumnSource.getFinalTable() → TTable for 'books' (final physical table) 314 * }</pre> 315 * 316 * <p>For calculated columns in subqueries (expressions like {@code START_DT - x AS alias}), 317 * this returns null because such calculated columns don't originate from a physical 318 * table - they are derived values computed in the subquery.</p> 319 * 320 * <p>For aliased columns in subqueries (e.g., {@code SELECT t.id AS col1 FROM my_table t}), 321 * this traces through the alias to find the physical table, because the data still 322 * originates from the physical table even though the column has been renamed.</p> 323 * 324 * <p>Note: For CTEs, calculated columns ARE the CTE's own columns, so they trace 325 * to the CTE itself (handled by CTENamespace.getFinalTable()).</p> 326 * 327 * @return The physical table, or null if unable to determine or if calculated in subquery 328 * @see gudusoft.gsqlparser.nodes.TObjectName#getSourceTable() 329 */ 330 public TTable getFinalTable() { 331 if (sourceNamespace == null && overrideTable == null) { 332 return null; 333 } 334 335 // For SubqueryNamespace: calculated columns should NOT trace to base table 336 // They are derived values that don't exist in the underlying physical table 337 // Example: SELECT *, expr AS alias FROM table - alias is calculated, not from table 338 // 339 // IMPORTANT: Check BEFORE overrideTable to prevent alias/calculated columns 340 // from being traced to base tables even when overrideTable is explicitly set 341 if (sourceNamespace instanceof SubqueryNamespace && isCalculatedColumn()) { 342 return null; 343 } 344 345 // For CTENamespace: calculated columns ARE the CTE's own columns 346 // They should trace to the CTE itself (referencing table), NOT to underlying base tables 347 // Example: WITH cte AS (SELECT SUM(x) AS total FROM t) SELECT total FROM cte 348 // The 'total' column traces to 'cte', not to 't' 349 if (sourceNamespace instanceof CTENamespace && isCalculatedColumn()) { 350 return ((CTENamespace) sourceNamespace).getReferencingTable(); 351 } 352 353 // For SubqueryNamespace: column aliases - trace through to find the source table. 354 // The alias changes the column name but the data still originates from a physical table. 355 // Example: SELECT t.id AS col1 FROM my_table t - col1's data comes from my_table 356 if (sourceNamespace instanceof SubqueryNamespace && isColumnAlias()) { 357 return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace); 358 } 359 360 // For CTENamespace: column aliases ARE the CTE's own columns 361 // They should trace to the CTE itself, NOT to underlying base tables 362 // Example: WITH cte AS (SELECT x AS y FROM t) SELECT y FROM cte 363 // The 'y' column traces to 'cte', not to 't' 364 if (sourceNamespace instanceof CTENamespace && isColumnAlias()) { 365 return ((CTENamespace) sourceNamespace).getReferencingTable(); 366 } 367 368 // For CTENamespace: explicit column names ARE the CTE's own columns 369 // They should trace to the CTE itself, NOT to underlying base tables 370 // Example: WITH cte(c1, c2) AS (SELECT id, name FROM t) SELECT c1 FROM cte 371 // The 'c1' column traces to 'cte', not to 't' (because 'c1' doesn't exist in 't') 372 if (sourceNamespace instanceof CTENamespace && isCTEExplicitColumn()) { 373 return ((CTENamespace) sourceNamespace).getReferencingTable(); 374 } 375 376 // For SubqueryNamespace: passthrough columns that reference calculated columns should NOT trace 377 // IMPORTANT: This check must come BEFORE isPassthroughToAlias() because 378 // isPassthroughToAlias() returns true for BOTH alias and calculated passthroughs. 379 // Example: SELECT kko_lfz_9 FROM (SELECT CASE...END AS kko_lfz_9 FROM t) subq 380 // The outer kko_lfz_9 references a calculated column in the subquery 381 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToCalculatedInSubquery()) { 382 return null; 383 } 384 385 // For SubqueryNamespace: passthrough columns that reference aliases - trace through 386 // to find the source table. The data still originates from a physical table. 387 // Example: SELECT stat_typ FROM (SELECT stat_typ = stellplatz_typ FROM t) AS b 388 // The stat_typ in outer query references b.stat_typ → traces to t 389 if (sourceNamespace instanceof SubqueryNamespace && isPassthroughToAlias()) { 390 return traceColumnAliasThroughSubquery((SubqueryNamespace) sourceNamespace); 391 } 392 393 // For CTENamespace: passthrough columns that reference calculated subquery columns should NOT trace 394 // Example: WITH DataCTE AS (SELECT subq.calc_col FROM (SELECT CASE...END AS calc_col FROM t) subq) 395 // The CTE column calc_col references a calculated column in the subquery 396 if (sourceNamespace instanceof CTENamespace && isPassthroughToCalculatedInCTE()) { 397 return null; 398 } 399 400 // For CTE explicit column + star pattern: c1/c2/c3 trace to the star, NOT through the star 401 // Example: WITH cte(c1, c2, c3) AS (SELECT * FROM Employees) 402 // Without metadata, c1 traces to Employees.* (the star), not Employees.c1 (which doesn't exist) 403 // The evidence "cte_explicit_column_via_star".equals(evidence) indicates this pattern 404 if ("cte_explicit_column_via_star".equals(evidence)) { 405 return null; 406 } 407 408 // For inferred columns traced through SELECT * across a CTE/subquery chain: 409 // if the exposed name is defined deeper in the chain as a renamed alias of a 410 // simple column (e.g. {@code SELECT t.col AS bug2}), the alias name does NOT 411 // exist as a column in the underlying physical table. Falling through to the 412 // generic tracing would link the alias name to the base table - producing 413 // bogus entries in TTable.getLinkedColumns(). 414 if (isInferredAliasThroughChain()) { 415 if (sourceNamespace instanceof CTENamespace) { 416 return ((CTENamespace) sourceNamespace).getReferencingTable(); 417 } 418 return null; 419 } 420 421 // If an override table is set (e.g., for traced columns), use it 422 if (overrideTable != null) { 423 return overrideTable; 424 } 425 426 if (sourceNamespace == null) { 427 return null; 428 } 429 430 // For UnionNamespace: UNION columns don't belong to any specific physical table. 431 // They're a combination of multiple branches. UnionNamespace.getFinalTable() returns 432 // the first branch's table which is incorrect for tracking column origins. 433 if (sourceNamespace instanceof UnionNamespace) { 434 return null; 435 } 436 437 // For SubqueryNamespace without override table: if the subquery has multiple tables 438 // AND no qualified star to identify the source, we can't determine which table 439 // the column comes from. Returning the first table would be incorrect. 440 // Example: FROM CDS_H_PARTNER PAR, (SELECT kategorie ... FROM CDS_H_KUNDEN_OBJEKT) subq 441 // But if there's a qualified star like "ta.*", that identifies the source table. 442 // 443 // IMPORTANT: For Teradata, implicit lateral derived tables (auto-added tables when 444 // a column references an undeclared table in WHERE clause) should be excluded from 445 // the multiple-table count. These are syntactic sugar and shouldn't affect column 446 // resolution to the actual source table. 447 if (sourceNamespace instanceof SubqueryNamespace && overrideTable == null) { 448 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 449 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 450 if (subquery != null && subquery.tables != null) { 451 // Count only real tables (excluding implicit lateral derived tables) 452 int realTableCount = countRealTables(subquery.tables); 453 if (realTableCount > 1) { 454 // Check if there's a qualified star that can identify the source 455 if (!hasQualifiedStar(subquery)) { 456 return null; 457 } 458 } 459 } 460 } 461 462 // For CTENamespace with multiple tables (e.g., JOIN): trace the specific column 463 // to its correct source table using the definitionNode 464 // Example: WITH cte AS (SELECT m.album_id, b.band_name FROM albums m JOIN bands b ...) 465 // When tracing 'band_name', we need to find it comes from 'b' (bands), not 'm' (albums) 466 if (sourceNamespace instanceof CTENamespace) { 467 CTENamespace cteNs = (CTENamespace) sourceNamespace; 468 TTable tracedTable = null; 469 470 if (definitionNode instanceof TResultColumn) { 471 // Direct case: definitionNode is a TResultColumn from the CTE's SELECT list 472 tracedTable = traceColumnThroughCTE(cteNs, (TResultColumn) definitionNode); 473 } else { 474 // Indirect case: The column might be traced through a star column 475 // Try to find the column by name in the CTE chain 476 tracedTable = traceColumnByNameThroughCTE(cteNs, exposedName); 477 } 478 479 if (tracedTable != null) { 480 return tracedTable; 481 } 482 483 // For CTEs with multiple tables, if tracing failed (unqualified column), 484 // return null instead of the first table to avoid incorrect lineage. 485 // Example: WITH cte AS (SELECT musicians.id, musician_name, music_bands.band_name 486 // FROM musicians JOIN ... JOIN music_bands) 487 // The unqualified 'musician_name' cannot be traced to any specific table 488 // without metadata, so we should NOT guess and pick the first table. 489 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 490 if (cteSelect != null && cteSelect.tables != null) { 491 int tableCount = countRealTables(cteSelect.tables); 492 if (tableCount > 1) { 493 // Cannot determine which table - don't guess 494 return null; 495 } 496 } 497 } 498 499 return sourceNamespace.getFinalTable(); 500 } 501 502 /** 503 * Get the original column name in the physical table when this column is an alias. 504 * 505 * <p>When a column is aliased in a subquery (e.g., {@code SELECT t.id AS col1}), 506 * the exposed name is {@code col1} but the original column in the physical table 507 * is {@code id}. This method returns {@code id} so callers can pair the correct 508 * column name with the physical table returned by {@link #getFinalTable()}.</p> 509 * 510 * <p>For multi-level aliases (e.g., {@code SELECT ADCS_MSISDN FROM (SELECT MSISDN AS ADCS_MSISDN FROM t) sub}), 511 * this recursively traces through all levels to find the original column name ({@code MSISDN}).</p> 512 * 513 * @return The original column name if this is an alias, or null if not an alias 514 * or unable to determine 515 */ 516 public String getFinalColumnName() { 517 return getFinalColumnNameInternal(0); 518 } 519 520 /** 521 * Internal recursive implementation of getFinalColumnName with depth limit. 522 */ 523 private String getFinalColumnNameInternal(int depth) { 524 if (depth > 10) return null; // safety limit for deeply nested aliases 525 526 if (!isColumnAlias() && !isPassthroughToAlias()) { 527 return null; 528 } 529 530 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 531 return null; 532 } 533 534 TResultColumn rc = (TResultColumn) definitionNode; 535 TExpression expr = rc.getExpr(); 536 if (expr == null) { 537 return null; 538 } 539 540 TObjectName colRef = null; 541 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 542 colRef = expr.getObjectOperand(); 543 } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) { 544 TExpression rightExpr = expr.getRightOperand(); 545 if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 546 colRef = rightExpr.getObjectOperand(); 547 } 548 } 549 550 if (colRef != null) { 551 // Check if this inner column reference itself has a ColumnSource that's also an alias. 552 // If so, recursively trace to get the deepest original column name. 553 ColumnSource innerSource = colRef.getColumnSource(); 554 if (innerSource != null && (innerSource.isColumnAlias() || innerSource.isPassthroughToAlias())) { 555 String deeperName = innerSource.getFinalColumnNameInternal(depth + 1); 556 if (deeperName != null) { 557 return deeperName; 558 } 559 } 560 return colRef.getColumnNameOnly(); 561 } 562 return null; 563 } 564 565 /** 566 * Trace a column by name through a CTE to find its correct source table. 567 * This handles the case when the column is traced through star columns 568 * and we don't have the direct TResultColumn definition. 569 * 570 * @param cteNs The CTENamespace to trace through 571 * @param columnName The name of the column to find 572 * @return The correct source table, or null if unable to determine 573 */ 574 // ===== S2: vendor-aware identifier matching helpers ===== 575 // Replace raw equalsIgnoreCase compares so quoted identifiers and 576 // vendor-specific case rules (BigQuery: tables sensitive, columns 577 // insensitive; Oracle/Postgres quoted: sensitive; etc.) are honored. 578 // The 2-arg INameMatcher.matches(...) defaults to dotColumn semantics 579 // inside VendorNameMatcher, so we route through VendorNameMatcher 580 // explicitly with the right ESQLDataObjectType when possible. 581 582 /** 583 * Slice S2: alias-equality check (subquery alias / table alias). 584 * Routes through dotTable on a VendorNameMatcher so BigQuery's 585 * table-sensitive rule is honored, falling back to the namespace's 586 * matcher for vendor-agnostic test scopes. 587 */ 588 private boolean aliasMatches(String stored, String input) { 589 return matchesAs(stored, input, ESQLDataObjectType.dotTable); 590 } 591 592 /** 593 * Slice S2: bare table-name equality check. 594 * Same routing as {@link #aliasMatches} (dotTable). Kept as a 595 * separate name so call sites read self-documentingly. 596 */ 597 private boolean tableMatches(String stored, String input) { 598 return matchesAs(stored, input, ESQLDataObjectType.dotTable); 599 } 600 601 /** 602 * Slice S2: column-name equality check. 603 * Routes through dotColumn (the VendorNameMatcher default), but 604 * called explicitly so future audits cannot mistake call sites 605 * for table compares. 606 */ 607 private boolean columnMatches(String stored, String input) { 608 return matchesAs(stored, input, ESQLDataObjectType.dotColumn); 609 } 610 611 /** 612 * Look up the namespace's name matcher and route the compare through 613 * it with the supplied {@link ESQLDataObjectType}. Falls back to 614 * {@link String#equalsIgnoreCase} when the namespace is missing or is 615 * not an {@link AbstractNamespace} (synthetic / unit-test scopes), to 616 * preserve current vendor-agnostic behaviour for those callers. 617 */ 618 private boolean matchesAs(String a, String b, ESQLDataObjectType objectType) { 619 if (a == null || b == null) { 620 return a == b; 621 } 622 INameMatcher matcher = sourceNamespace instanceof AbstractNamespace 623 ? ((AbstractNamespace) sourceNamespace).getNameMatcher() 624 : null; 625 if (matcher instanceof VendorNameMatcher) { 626 return ((VendorNameMatcher) matcher).matches(a, b, objectType); 627 } 628 if (matcher != null) { 629 return matcher.matches(a, b); 630 } 631 return a.equalsIgnoreCase(b); 632 } 633 634 private TTable traceColumnByNameThroughCTE(CTENamespace cteNs, String columnName) { 635 if (columnName == null || columnName.isEmpty()) { 636 return null; 637 } 638 639 // Get the CTE's SELECT statement 640 TSelectSqlStatement cteSelect = cteNs.getSelectStatement(); 641 if (cteSelect == null) { 642 return null; 643 } 644 645 // First, check if this CTE has explicit columns matching the name 646 TTable result = findColumnInSelectList(cteSelect, columnName); 647 if (result != null) { 648 return result; 649 } 650 651 // If not found directly, check if this CTE uses SELECT * from another CTE 652 if (cteSelect.tables != null) { 653 for (int i = 0; i < cteSelect.tables.size(); i++) { 654 TTable table = cteSelect.tables.getTable(i); 655 if (table == null) continue; 656 657 // If it references another CTE, trace through it 658 if (table.isCTEName() && table.getCTE() != null) { 659 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 660 TSelectSqlStatement underlyingSelect = underlyingCte.getSubquery(); 661 if (underlyingSelect != null) { 662 result = findColumnInSelectList(underlyingSelect, columnName); 663 if (result != null) { 664 return result; 665 } 666 } 667 } 668 } 669 } 670 671 return null; 672 } 673 674 /** 675 * Trace a column through a CTE to find its correct source table. 676 * This handles CTEs with JOINs where columns come from different tables, 677 * including CTEs with star columns that reference other CTEs. 678 * 679 * @param cteNs The CTENamespace 680 * @param resultColumn The TResultColumn from the CTE's SELECT list 681 * @return The correct source table, or null if unable to determine 682 */ 683 private TTable traceColumnThroughCTE(CTENamespace cteNs, TResultColumn resultColumn) { 684 if (resultColumn == null || resultColumn.getExpr() == null) { 685 return null; 686 } 687 688 TExpression expr = resultColumn.getExpr(); 689 690 // Check if the expression is a star column (e.g., SELECT * FROM other_cte) 691 // In this case, we need to trace through to the underlying CTE 692 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 693 TObjectName colRef = expr.getObjectOperand(); 694 if (colRef != null && "*".equals(colRef.getColumnNameOnly())) { 695 // This is a star column - trace through to find the actual column 696 return traceColumnThroughStarInCTE(cteNs, exposedName); 697 } 698 } 699 700 // Check if the expression is a simple column reference 701 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 702 return null; 703 } 704 705 TObjectName colRef = expr.getObjectOperand(); 706 if (colRef == null) { 707 return null; 708 } 709 710 // Check if the column has a table qualifier (e.g., "b.band_name") 711 String tableQualifier = colRef.getTableString(); 712 if (tableQualifier == null || tableQualifier.isEmpty()) { 713 // No qualifier - can't determine which table 714 return null; 715 } 716 717 // Get the CTE's subquery to find the table with matching alias 718 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 719 if (cteSubquery == null || cteSubquery.tables == null) { 720 return null; 721 } 722 723 // Search for the table with matching alias or name (S2: vendor-aware compares) 724 for (int i = 0; i < cteSubquery.tables.size(); i++) { 725 TTable table = cteSubquery.tables.getTable(i); 726 if (table == null) continue; 727 728 // Check alias match 729 String alias = table.getAliasName(); 730 if (alias != null && aliasMatches(alias, tableQualifier)) { 731 // Found the table - now trace to its final physical table if needed 732 return traceToPhysicalTable(table); 733 } 734 735 // Check table name match (for unaliased tables) 736 String tableName = table.getTableName() != null ? table.getTableName().toString() : null; 737 if (tableName != null && tableMatches(tableName, tableQualifier)) { 738 return traceToPhysicalTable(table); 739 } 740 } 741 742 return null; 743 } 744 745 /** 746 * Trace a specific column through a CTE that uses SELECT *. 747 * This finds the underlying CTE that defines the column and traces it to the correct table. 748 * 749 * @param cteNs The CTE namespace with SELECT * 750 * @param columnName The name of the column to trace 751 * @return The correct source table, or null if unable to determine 752 */ 753 private TTable traceColumnThroughStarInCTE(CTENamespace cteNs, String columnName) { 754 if (columnName == null || columnName.isEmpty()) { 755 return null; 756 } 757 758 TSelectSqlStatement cteSubquery = cteNs.getSelectStatement(); 759 if (cteSubquery == null || cteSubquery.tables == null) { 760 return null; 761 } 762 763 // Find the underlying CTE or table that the star column references 764 for (int i = 0; i < cteSubquery.tables.size(); i++) { 765 TTable table = cteSubquery.tables.getTable(i); 766 if (table == null) continue; 767 768 // If it's a CTE reference, look for the column in that CTE 769 if (table.isCTEName() && table.getCTE() != null) { 770 gudusoft.gsqlparser.nodes.TCTE underlyingCte = table.getCTE(); 771 TSelectSqlStatement underlyingSubquery = underlyingCte.getSubquery(); 772 if (underlyingSubquery != null) { 773 // Look for the column in the underlying CTE's SELECT list 774 TTable tracedTable = findColumnInSelectList(underlyingSubquery, columnName); 775 if (tracedTable != null) { 776 return tracedTable; 777 } 778 } 779 } 780 } 781 782 return null; 783 } 784 785 /** 786 * Find a column by name in a SELECT list and trace it to its source table. 787 * 788 * @param selectStmt The SELECT statement to search 789 * @param columnName The column name to find 790 * @return The source table for the column, or null if not found 791 */ 792 private TTable findColumnInSelectList(TSelectSqlStatement selectStmt, String columnName) { 793 if (selectStmt == null || selectStmt.getResultColumnList() == null) { 794 return null; 795 } 796 797 gudusoft.gsqlparser.nodes.TResultColumnList resultList = selectStmt.getResultColumnList(); 798 for (int i = 0; i < resultList.size(); i++) { 799 TResultColumn rc = resultList.getResultColumn(i); 800 if (rc == null) continue; 801 802 // Get the exposed name (alias or column name) 803 String exposedColName = rc.getAliasClause() != null 804 ? rc.getAliasClause().toString() 805 : (rc.getExpr() != null && rc.getExpr().getObjectOperand() != null 806 ? rc.getExpr().getObjectOperand().getColumnNameOnly() 807 : null); 808 809 if (exposedColName != null && columnMatches(exposedColName, columnName)) { 810 // Found the column - trace it to its source table (S2: vendor-aware compares) 811 TExpression expr = rc.getExpr(); 812 if (expr != null && expr.getExpressionType() == EExpressionType.simple_object_name_t) { 813 TObjectName colRef = expr.getObjectOperand(); 814 if (colRef != null) { 815 String tableQualifier = colRef.getTableString(); 816 if (tableQualifier != null && !tableQualifier.isEmpty()) { 817 // Find the table with this qualifier in the FROM clause 818 if (selectStmt.tables != null) { 819 for (int j = 0; j < selectStmt.tables.size(); j++) { 820 TTable table = selectStmt.tables.getTable(j); 821 if (table == null) continue; 822 823 String alias = table.getAliasName(); 824 if (alias != null && aliasMatches(alias, tableQualifier)) { 825 return traceToPhysicalTable(table); 826 } 827 828 String tableName = table.getTableName() != null 829 ? table.getTableName().toString() : null; 830 if (tableName != null && tableMatches(tableName, tableQualifier)) { 831 return traceToPhysicalTable(table); 832 } 833 } 834 } 835 } 836 } 837 } 838 } 839 } 840 841 return null; 842 } 843 844 /** 845 * Trace a table to its underlying physical table. 846 * Handles CTEs, subqueries, and JOINs. 847 */ 848 private TTable traceToPhysicalTable(TTable table) { 849 if (table == null) { 850 return null; 851 } 852 853 // If it's already a physical table, return it 854 if (table.getTableType() == gudusoft.gsqlparser.ETableSource.objectname && !table.isCTEName()) { 855 return table; 856 } 857 858 // If it's a CTE reference, trace through the CTE 859 if (table.isCTEName() && table.getCTE() != null) { 860 // Use a simple approach - get the first physical table from the CTE 861 // This could be enhanced to trace specific columns through nested CTEs 862 gudusoft.gsqlparser.nodes.TCTE nestedCte = table.getCTE(); 863 if (nestedCte.getSubquery() != null && nestedCte.getSubquery().tables != null) { 864 for (int i = 0; i < nestedCte.getSubquery().tables.size(); i++) { 865 TTable nestedTable = nestedCte.getSubquery().tables.getTable(i); 866 TTable physical = traceToPhysicalTable(nestedTable); 867 if (physical != null) { 868 return physical; 869 } 870 } 871 } 872 } 873 874 // If it's a subquery, trace through it 875 if (table.getSubquery() != null) { 876 SubqueryNamespace nestedNs = new SubqueryNamespace( 877 table.getSubquery(), 878 table.getAliasName(), 879 null // nameMatcher not needed for simple tracing 880 ); 881 return nestedNs.getFinalTable(); 882 } 883 884 return null; 885 } 886 887 /** 888 * Trace a column alias through a subquery to find its source table. 889 * When a column is aliased (e.g., SELECT t.id AS col1 FROM my_table t), 890 * the alias changes the column name but the data still comes from the 891 * underlying table. This method traces through to find that table. 892 * 893 * <p>Used by {@link #getTracedFinalTable()} to provide alias-aware lineage 894 * tracing. Handles both qualified (t.id AS col1) and unqualified (id AS col1) 895 * column references, as well as SQL Server proprietary alias syntax.</p> 896 * 897 * @param subNs The SubqueryNamespace containing the aliased column 898 * @return The physical table the column traces to, or null if undetermined 899 */ 900 private TTable traceColumnAliasThroughSubquery(SubqueryNamespace subNs) { 901 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 902 return null; 903 } 904 905 TResultColumn rc = (TResultColumn) definitionNode; 906 TExpression expr = rc.getExpr(); 907 if (expr == null) { 908 return null; 909 } 910 911 TObjectName colRef = null; 912 913 if (expr.getExpressionType() == EExpressionType.simple_object_name_t) { 914 // Standard: SELECT col AS alias 915 colRef = expr.getObjectOperand(); 916 } else if (expr.getExpressionType() == EExpressionType.sqlserver_proprietary_column_alias_t) { 917 // SQL Server: SELECT alias = col 918 TExpression rightExpr = expr.getRightOperand(); 919 if (rightExpr != null && rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 920 colRef = rightExpr.getObjectOperand(); 921 } 922 } 923 924 if (colRef == null) { 925 return null; 926 } 927 928 // Use the resolved sourceTable from the inner column reference 929 TTable sourceTable = colRef.getSourceTable(); 930 if (sourceTable != null) { 931 return traceToPhysicalTable(sourceTable); 932 } 933 934 // Fallback: if no sourceTable resolved, try to find by table qualifier (S2: vendor-aware) 935 String tableQualifier = colRef.getTableString(); 936 if (tableQualifier != null && !tableQualifier.isEmpty()) { 937 TSelectSqlStatement subquery = subNs.getSubquery(); 938 if (subquery != null && subquery.tables != null) { 939 for (int i = 0; i < subquery.tables.size(); i++) { 940 TTable table = subquery.tables.getTable(i); 941 if (table == null) continue; 942 943 String alias = table.getAliasName(); 944 if (alias != null && aliasMatches(alias, tableQualifier)) { 945 return traceToPhysicalTable(table); 946 } 947 948 String tableName = table.getTableName() != null ? table.getTableName().toString() : null; 949 if (tableName != null && tableMatches(tableName, tableQualifier)) { 950 return traceToPhysicalTable(table); 951 } 952 } 953 } 954 } 955 956 return null; 957 } 958 959 /** 960 * Get all physical tables that this column might originate from. 961 * 962 * <p>For columns from UNION queries, this returns tables from ALL branches, 963 * not just the first one. This is essential for proper lineage tracking 964 * where a column like {@code actor_id} in a UNION query should be linked 965 * to {@code actor.actor_id}, {@code actor2.actor_id}, {@code actor3.actor_id}.</p> 966 * 967 * <p>For regular single-table sources, this returns a single-element list 968 * with the same table as {@link #getFinalTable()}.</p> 969 * 970 * @return List of all physical tables, or empty list if unable to determine 971 */ 972 public java.util.List<TTable> getAllFinalTables() { 973 // If this ColumnSource has explicit candidateTables set (e.g., from UNION inference), 974 // use those instead of delegating to namespace. This is critical for UNION queries 975 // where only branches with SELECT * should contribute candidate tables for inferred columns. 976 // An EMPTY list means "no matching tables" - return it as-is without delegating. 977 // A NULL means "not determined" - delegate to namespace. 978 if (candidateTables != null) { 979 return candidateTables; 980 } 981 982 if (sourceNamespace == null) { 983 if (overrideTable != null) { 984 return java.util.Collections.singletonList(overrideTable); 985 } 986 return java.util.Collections.emptyList(); 987 } 988 989 // For calculated columns and aliases in SubqueryNamespace, don't trace 990 if (sourceNamespace instanceof SubqueryNamespace) { 991 if (isCalculatedColumn() || isColumnAlias()) { 992 return java.util.Collections.emptyList(); 993 } 994 } 995 996 // For CTENamespace calculated/alias/explicit columns, trace to CTE itself 997 if (sourceNamespace instanceof CTENamespace) { 998 if (isCalculatedColumn() || isColumnAlias() || isCTEExplicitColumn()) { 999 TTable cteTable = ((CTENamespace) sourceNamespace).getReferencingTable(); 1000 if (cteTable != null) { 1001 return java.util.Collections.singletonList(cteTable); 1002 } 1003 return java.util.Collections.emptyList(); 1004 } 1005 } 1006 1007 // Delegate to namespace - handles UNION queries via UnionNamespace.getAllFinalTables() 1008 return sourceNamespace.getAllFinalTables(); 1009 } 1010 1011 /** 1012 * Check if this column is a passthrough reference to an underlying alias. 1013 * 1014 * <p>A passthrough column is a simple column reference in a subquery that 1015 * references another column from its FROM clause. If that underlying column 1016 * is an alias, then this passthrough should not trace to the base table.</p> 1017 * 1018 * <p>Example: In {@code SELECT stat_typ FROM (SELECT stat_typ = col FROM t) AS b}, 1019 * the outer {@code stat_typ} is a passthrough to {@code b.stat_typ}, which is an alias.</p> 1020 * 1021 * @return true if this is a passthrough to an alias 1022 */ 1023 private boolean isPassthroughToAlias() { 1024 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1025 return false; 1026 } 1027 1028 TResultColumn rc = (TResultColumn) definitionNode; 1029 TExpression expr = rc.getExpr(); 1030 if (expr == null) { 1031 return false; 1032 } 1033 1034 // Only check simple column references (passthroughs) 1035 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1036 return false; 1037 } 1038 1039 // If this column itself has an alias that differs, it's already handled by isColumnAlias() 1040 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1041 return false; 1042 } 1043 1044 // Get the column name being referenced 1045 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1046 if (objName == null) { 1047 return false; 1048 } 1049 String columnName = objName.getColumnNameOnly(); 1050 if (columnName == null || columnName.isEmpty()) { 1051 return false; 1052 } 1053 1054 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 1055 if (sourceNamespace instanceof SubqueryNamespace) { 1056 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1057 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 1058 if (underlyingSource != null) { 1059 // Check if the underlying column is an alias or calculated 1060 if (underlyingSource.isColumnAlias() || underlyingSource.isCalculatedColumn()) { 1061 return true; 1062 } 1063 // Recursively check if it's a passthrough to alias 1064 if (underlyingSource.isPassthroughToAlias()) { 1065 return true; 1066 } 1067 } 1068 } 1069 1070 return false; 1071 } 1072 1073 /** 1074 * Check if this subquery column is a passthrough reference to a calculated column. 1075 * 1076 * <p>A subquery column is a passthrough to calculated if:</p> 1077 * <ol> 1078 * <li>The column definition is a simple column reference (e.g., kko_lfz_9)</li> 1079 * <li>The referenced column in the FROM scope is calculated (CASE, function, etc.)</li> 1080 * </ol> 1081 * 1082 * <p>Example:</p> 1083 * <pre> 1084 * SELECT kko_lfz_9 AS KKO_LFZ_9 1085 * FROM (SELECT CASE WHEN... END AS kko_lfz_9 FROM t) subq 1086 * </pre> 1087 * <p>Here, kko_lfz_9 in the outer query is a passthrough to a calculated column in subq.</p> 1088 * 1089 * <p>This differs from {@link #isPassthroughToAlias()} which skips columns with aliases. 1090 * Here we check even aliased passthroughs to see if they reference calculated columns.</p> 1091 * 1092 * @return true if this is a passthrough to a calculated column in a subquery 1093 */ 1094 private boolean isPassthroughToCalculatedInSubquery() { 1095 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1096 return false; 1097 } 1098 1099 TResultColumn rc = (TResultColumn) definitionNode; 1100 TExpression expr = rc.getExpr(); 1101 if (expr == null) { 1102 return false; 1103 } 1104 1105 // Only check simple column references (passthroughs) 1106 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1107 return false; 1108 } 1109 1110 // Get the column name being referenced 1111 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1112 if (objName == null) { 1113 return false; 1114 } 1115 String columnName = objName.getColumnNameOnly(); 1116 if (columnName == null || columnName.isEmpty()) { 1117 return false; 1118 } 1119 1120 // Resolve this column in the subquery's FROM scope to find the underlying ColumnSource 1121 if (sourceNamespace instanceof SubqueryNamespace) { 1122 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1123 ColumnSource underlyingSource = subNs.resolveColumnInFromScope(columnName); 1124 if (underlyingSource != null) { 1125 // Check if the underlying column is calculated 1126 if (underlyingSource.isCalculatedColumn()) { 1127 return true; 1128 } 1129 // Recursively check if it's a passthrough to calculated 1130 if (underlyingSource.isPassthroughToCalculatedInSubquery()) { 1131 return true; 1132 } 1133 } 1134 } 1135 1136 return false; 1137 } 1138 1139 /** 1140 * Check if this CTE column is a passthrough reference to a calculated column in a subquery or nested CTE. 1141 * 1142 * <p>A CTE column is a passthrough to calculated if:</p> 1143 * <ol> 1144 * <li>The column definition is a simple qualified column reference (e.g., subq.calc_col or cte.calc_col)</li> 1145 * <li>The qualifier refers to a subquery or CTE in the CTE's body</li> 1146 * <li>The referenced column in that subquery/CTE is calculated (CASE, function, etc.)</li> 1147 * </ol> 1148 * 1149 * <p>Example with subquery:</p> 1150 * <pre> 1151 * WITH DataCTE AS ( 1152 * SELECT ErrorCountsCTE.ErrorSeverityCategory -- passthrough 1153 * FROM (SELECT CASE...END AS ErrorSeverityCategory FROM t) ErrorCountsCTE 1154 * ) 1155 * </pre> 1156 * 1157 * <p>Example with nested CTE:</p> 1158 * <pre> 1159 * WITH attendance_summary AS ( 1160 * SELECT date_trunc('month', attendance_date) as month FROM attendance 1161 * ) 1162 * WITH outer_cte AS ( 1163 * SELECT a.month FROM attendance_summary a -- passthrough to calculated in nested CTE 1164 * ) 1165 * </pre> 1166 * 1167 * @return true if this is a passthrough to a calculated column in a CTE 1168 */ 1169 private boolean isPassthroughToCalculatedInCTE() { 1170 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1171 return false; 1172 } 1173 1174 TResultColumn rc = (TResultColumn) definitionNode; 1175 TExpression expr = rc.getExpr(); 1176 if (expr == null) { 1177 return false; 1178 } 1179 1180 // Only check simple qualified column references (passthroughs like subq.column) 1181 if (expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1182 return false; 1183 } 1184 1185 // Get the column reference 1186 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1187 if (objName == null) { 1188 return false; 1189 } 1190 1191 // Must have a table qualifier (e.g., "ErrorCountsCTE" in "ErrorCountsCTE.ErrorSeverityCategory") 1192 String tableQualifier = objName.getTableString(); 1193 if (tableQualifier == null || tableQualifier.isEmpty()) { 1194 return false; 1195 } 1196 1197 String columnName = objName.getColumnNameOnly(); 1198 if (columnName == null || columnName.isEmpty()) { 1199 return false; 1200 } 1201 1202 // Get the CTE's subquery to find the referenced subquery alias 1203 if (!(sourceNamespace instanceof CTENamespace)) { 1204 return false; 1205 } 1206 1207 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1208 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 1209 if (cte == null || cte.getSubquery() == null) { 1210 return false; 1211 } 1212 1213 // Find the subquery/table with this alias in the CTE's body 1214 gudusoft.gsqlparser.stmt.TSelectSqlStatement cteBody = cte.getSubquery(); 1215 TTable referencedTable = findTableByAlias(cteBody, tableQualifier); 1216 if (referencedTable == null) { 1217 return false; 1218 } 1219 1220 // Case 1: Referenced table is a subquery 1221 if (referencedTable.getSubquery() != null) { 1222 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = referencedTable.getSubquery(); 1223 return isCalculatedColumnInSelect(subquery, columnName); 1224 } 1225 1226 // Case 2: Referenced table is a CTE reference 1227 if (referencedTable.isCTEName() && referencedTable.getCTE() != null) { 1228 gudusoft.gsqlparser.nodes.TCTE referencedCTE = referencedTable.getCTE(); 1229 if (referencedCTE.getSubquery() != null) { 1230 return isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName); 1231 } 1232 } 1233 1234 return false; 1235 } 1236 1237 /** 1238 * Find a table in a SELECT statement by its alias. 1239 */ 1240 private TTable findTableByAlias(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String alias) { 1241 if (select == null || select.tables == null || alias == null) { 1242 return null; 1243 } 1244 1245 for (int i = 0; i < select.tables.size(); i++) { 1246 TTable table = select.tables.getTable(i); 1247 if (table != null) { 1248 String tableAlias = table.getAliasName(); 1249 // S2: vendor-aware alias / table-name compare (dotTable) 1250 if (tableAlias != null && aliasMatches(tableAlias, alias)) { 1251 return table; 1252 } 1253 // Also check table name for non-aliased references 1254 if (tableAlias == null && table.getTableName() != null) { 1255 String tableName = table.getTableName().toString(); 1256 if (tableName != null && tableMatches(tableName, alias)) { 1257 return table; 1258 } 1259 } 1260 } 1261 } 1262 return null; 1263 } 1264 1265 /** 1266 * Check if a column in a SELECT statement is calculated (not a simple column reference). 1267 */ 1268 private boolean isCalculatedColumnInSelect(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1269 if (select == null || select.getResultColumnList() == null || columnName == null) { 1270 return false; 1271 } 1272 1273 for (int i = 0; i < select.getResultColumnList().size(); i++) { 1274 TResultColumn rc = select.getResultColumnList().getResultColumn(i); 1275 if (rc == null) continue; 1276 1277 // Get the column name for this result column 1278 String rcName = null; 1279 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1280 rcName = rc.getAliasClause().getAliasName().toString(); 1281 } else if (rc.getExpr() != null && 1282 rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t && 1283 rc.getExpr().getObjectOperand() != null) { 1284 rcName = rc.getExpr().getObjectOperand().getColumnNameOnly(); 1285 } 1286 1287 if (rcName != null && columnMatches(rcName, columnName)) { 1288 // S2: vendor-aware column-name compare. Found the column - check if it's calculated. 1289 TExpression expr = rc.getExpr(); 1290 if (expr != null && expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1291 // Non-simple expression = calculated 1292 return true; 1293 } 1294 } 1295 } 1296 return false; 1297 } 1298 1299 /** 1300 * Check if this column source represents a calculated expression. 1301 * 1302 * <p>A column is calculated if its definition is a TResultColumn with 1303 * a non-simple expression (not a direct column reference or star).</p> 1304 * 1305 * <p>For inferred columns (via star expansion), we trace back to the 1306 * source CTE/subquery to check if the original column is calculated.</p> 1307 * 1308 * @return true if this is a calculated column 1309 */ 1310 public boolean isCalculatedColumn() { 1311 if (definitionNode == null) { 1312 // For inferred columns through star expansion, check if the underlying 1313 // column in the source CTE/subquery is calculated 1314 return isInferredFromCalculatedColumn(); 1315 } 1316 1317 if (!(definitionNode instanceof TResultColumn)) { 1318 return false; 1319 } 1320 1321 TResultColumn rc = (TResultColumn) definitionNode; 1322 TExpression expr = rc.getExpr(); 1323 if (expr == null) { 1324 return false; 1325 } 1326 1327 EExpressionType exprType = expr.getExpressionType(); 1328 1329 // Simple column reference - NOT calculated (passthrough) 1330 if (exprType == EExpressionType.simple_object_name_t) { 1331 return false; 1332 } 1333 1334 // Star column - NOT calculated (passthrough) 1335 String colText = rc.toString(); 1336 if (colText != null && colText.endsWith("*")) { 1337 return false; 1338 } 1339 1340 // SQL Server proprietary column alias (col = expr) 1341 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1342 if (expr.getRightOperand() != null && 1343 expr.getRightOperand().getExpressionType() == EExpressionType.simple_object_name_t) { 1344 return false; 1345 } 1346 } 1347 1348 // Any other expression type is calculated 1349 return true; 1350 } 1351 1352 /** 1353 * Check if this is an inferred column (via star expansion) that originates from 1354 * a calculated column in the source CTE/subquery. 1355 * 1356 * <p>When a column is resolved through star expansion (e.g., SELECT * FROM CTE), 1357 * the definitionNode is null. We need to trace back to the source namespace 1358 * to check if the original column is calculated.</p> 1359 * 1360 * @return true if this inferred column traces back to a calculated column 1361 */ 1362 private boolean isInferredFromCalculatedColumn() { 1363 // Only check for inferred columns (evidence contains "auto_inferred") 1364 if (evidence == null || !evidence.contains("auto_inferred")) { 1365 return false; 1366 } 1367 1368 // Need the source namespace and column name to trace 1369 if (sourceNamespace == null || exposedName == null) { 1370 return false; 1371 } 1372 1373 // For CTE namespace, check if the column is calculated in the CTE's SELECT list 1374 if (sourceNamespace instanceof CTENamespace) { 1375 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1376 gudusoft.gsqlparser.nodes.TCTE cte = cteNs.getCTE(); 1377 if (cte != null && cte.getSubquery() != null) { 1378 // First check the CTE's direct SELECT list 1379 if (isCalculatedColumnInSelect(cte.getSubquery(), exposedName)) { 1380 return true; 1381 } 1382 1383 // If the CTE has a star column, trace through to referenced CTEs 1384 if (cteNs.hasStarColumn()) { 1385 return isCalculatedInCTEChain(cte.getSubquery(), exposedName); 1386 } 1387 } 1388 } 1389 1390 // For Subquery namespace, check if the column is calculated in the subquery's SELECT list 1391 if (sourceNamespace instanceof SubqueryNamespace) { 1392 SubqueryNamespace subNs = (SubqueryNamespace) sourceNamespace; 1393 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = subNs.getSubquery(); 1394 if (subquery != null) { 1395 // First check the subquery's direct SELECT list 1396 if (isCalculatedColumnInSelect(subquery, exposedName)) { 1397 return true; 1398 } 1399 1400 // If the subquery has a star column, trace through to source tables 1401 if (subNs.hasStarColumn()) { 1402 return isCalculatedInSubqueryChain(subquery, exposedName); 1403 } 1404 } 1405 } 1406 1407 return false; 1408 } 1409 1410 /** 1411 * Check if a column is calculated by tracing through CTE references or 1412 * inline subqueries in the FROM clause. This handles cases like 1413 * Stage4 -> Stage3 -> Stage2 where the column is calculated at some 1414 * intermediate level, regardless of whether each level is a named CTE or 1415 * an inline derived table. 1416 * 1417 * <p>Example covered by the subquery branch:</p> 1418 * <pre> 1419 * WITH tbl AS ( 1420 * SELECT t.* FROM (SELECT CASE...END AS bug1 FROM base) t 1421 * ) 1422 * SELECT bug1 FROM tbl 1423 * </pre> 1424 * <p>The CTE body's FROM clause is a subquery (not another CTE), but 1425 * bug1 is still calculated in the inner subquery and must not be traced 1426 * to {@code base}.</p> 1427 */ 1428 private boolean isCalculatedInCTEChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1429 return isCalculatedInCTEChain(select, columnName, 1430 new HashSet<gudusoft.gsqlparser.stmt.TSelectSqlStatement>()); 1431 } 1432 1433 private boolean isCalculatedInCTEChain( 1434 gudusoft.gsqlparser.stmt.TSelectSqlStatement select, 1435 String columnName, 1436 java.util.Set<gudusoft.gsqlparser.stmt.TSelectSqlStatement> visited) { 1437 if (select == null || select.tables == null) { 1438 return false; 1439 } 1440 // Guard against recursive CTEs (anchor body references itself via the 1441 // CTE name) and self-cycling subquery DAGs. 1442 if (!visited.add(select)) { 1443 return false; 1444 } 1445 1446 // Walk every table in the FROM clause - both CTE references and inline 1447 // subqueries can shadow underlying physical columns with calculated 1448 // expressions. 1449 for (int i = 0; i < select.tables.size(); i++) { 1450 TTable table = select.tables.getTable(i); 1451 if (table == null) continue; 1452 1453 if (table.isCTEName() && table.getCTE() != null) { 1454 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1455 if (referencedCTE.getSubquery() != null) { 1456 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1457 return true; 1458 } 1459 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName, visited)) { 1460 return true; 1461 } 1462 } 1463 } else if (table.getSubquery() != null) { 1464 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery(); 1465 if (isCalculatedColumnInSelect(subquery, columnName)) { 1466 return true; 1467 } 1468 if (isCalculatedInCTEChain(subquery, columnName, visited)) { 1469 return true; 1470 } 1471 } 1472 } 1473 return false; 1474 } 1475 1476 /** 1477 * Check if a column is calculated by tracing through subquery references. 1478 */ 1479 private boolean isCalculatedInSubqueryChain(gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1480 return isCalculatedInSubqueryChain(select, columnName, 1481 new HashSet<gudusoft.gsqlparser.stmt.TSelectSqlStatement>()); 1482 } 1483 1484 private boolean isCalculatedInSubqueryChain( 1485 gudusoft.gsqlparser.stmt.TSelectSqlStatement select, 1486 String columnName, 1487 java.util.Set<gudusoft.gsqlparser.stmt.TSelectSqlStatement> visited) { 1488 if (select == null || select.tables == null) { 1489 return false; 1490 } 1491 if (!visited.add(select)) { 1492 return false; 1493 } 1494 1495 // Look for subquery tables in the FROM clause 1496 for (int i = 0; i < select.tables.size(); i++) { 1497 TTable table = select.tables.getTable(i); 1498 if (table != null && table.getSubquery() != null) { 1499 gudusoft.gsqlparser.stmt.TSelectSqlStatement subquery = table.getSubquery(); 1500 if (isCalculatedColumnInSelect(subquery, columnName)) { 1501 return true; 1502 } 1503 if (isCalculatedInSubqueryChain(subquery, columnName, visited)) { 1504 return true; 1505 } 1506 } 1507 // Also check CTE references within subqueries 1508 if (table != null && table.isCTEName() && table.getCTE() != null) { 1509 gudusoft.gsqlparser.nodes.TCTE referencedCTE = table.getCTE(); 1510 if (referencedCTE.getSubquery() != null) { 1511 if (isCalculatedColumnInSelect(referencedCTE.getSubquery(), columnName)) { 1512 return true; 1513 } 1514 if (isCalculatedInCTEChain(referencedCTE.getSubquery(), columnName, visited)) { 1515 return true; 1516 } 1517 } 1518 } 1519 } 1520 return false; 1521 } 1522 1523 /** 1524 * Check if this inferred column traces through SELECT * across a CTE or 1525 * subquery chain to a renamed alias of a simple column. 1526 * 1527 * <p>Used by {@link #getFinalTable()} to avoid linking renamed alias names 1528 * to the underlying physical table. Unlike {@link #isColumnAlias()}, this 1529 * works for inferred columns whose {@code definitionNode} is null because 1530 * they were exposed through {@code SELECT *}.</p> 1531 * 1532 * <p>Example:</p> 1533 * <pre> 1534 * WITH tbl AS ( 1535 * SELECT t.* FROM (SELECT max_hxdate AS bug2 FROM base) t 1536 * ) 1537 * SELECT bug2 FROM tbl 1538 * </pre> 1539 * <p>{@code bug2} is exposed by the inner subquery as an alias of 1540 * {@code max_hxdate}. The base table has no column named {@code bug2}, 1541 * so tracing further must stop here.</p> 1542 */ 1543 private boolean isInferredAliasThroughChain() { 1544 if (evidence == null || !evidence.contains("auto_inferred")) { 1545 return false; 1546 } 1547 if (sourceNamespace == null || exposedName == null) { 1548 return false; 1549 } 1550 1551 gudusoft.gsqlparser.stmt.TSelectSqlStatement select = null; 1552 if (sourceNamespace instanceof CTENamespace) { 1553 CTENamespace cteNs = (CTENamespace) sourceNamespace; 1554 if (cteNs.getCTE() != null) { 1555 select = cteNs.getCTE().getSubquery(); 1556 } 1557 } else if (sourceNamespace instanceof SubqueryNamespace) { 1558 select = ((SubqueryNamespace) sourceNamespace).getSubquery(); 1559 } 1560 if (select == null) { 1561 return false; 1562 } 1563 1564 // Direct match in current SELECT list. 1565 if (isRenamedAliasInSelect(select, exposedName)) { 1566 return true; 1567 } 1568 // Walk through underlying CTE/subquery tables exposed via star. 1569 return isRenamedAliasInChain(select, exposedName, new HashSet<gudusoft.gsqlparser.stmt.TSelectSqlStatement>()); 1570 } 1571 1572 /** 1573 * Check if a SELECT list contains a result column where a simple column 1574 * reference is renamed via an alias whose name matches {@code columnName} 1575 * but differs from the underlying column's own name. 1576 */ 1577 private boolean isRenamedAliasInSelect( 1578 gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String columnName) { 1579 if (select == null || select.getResultColumnList() == null || columnName == null) { 1580 return false; 1581 } 1582 for (int i = 0; i < select.getResultColumnList().size(); i++) { 1583 TResultColumn rc = select.getResultColumnList().getResultColumn(i); 1584 if (rc == null || rc.getAliasClause() == null 1585 || rc.getAliasClause().getAliasName() == null) { 1586 continue; 1587 } 1588 String aliasName = rc.getAliasClause().getAliasName().toString(); 1589 if (aliasName == null || !columnMatches(aliasName, columnName)) { 1590 continue; 1591 } 1592 TExpression expr = rc.getExpr(); 1593 if (expr == null 1594 || expr.getExpressionType() != EExpressionType.simple_object_name_t) { 1595 // Non-simple expressions are handled as "calculated" elsewhere. 1596 continue; 1597 } 1598 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1599 if (objName == null) continue; 1600 String origName = objName.getColumnNameOnly(); 1601 if (origName != null && !origName.equalsIgnoreCase(aliasName)) { 1602 return true; 1603 } 1604 } 1605 return false; 1606 } 1607 1608 /** 1609 * Recursive walk through the CTE references and subquery tables in 1610 * {@code select}'s FROM clause that can expose {@code columnName} via 1611 * star expansion, looking for {@link #isRenamedAliasInSelect} matches. 1612 * 1613 * <p>The walk is restricted to star-source tables to avoid false 1614 * positives in queries like {@code SELECT a.* FROM a JOIN (SELECT x AS id 1615 * FROM b_base) b}: when resolving an inferred {@code id} from {@code a.*} 1616 * we must not match the alias inside {@code b}, which never contributed 1617 * to {@code a.*}'s expansion.</p> 1618 */ 1619 private boolean isRenamedAliasInChain( 1620 gudusoft.gsqlparser.stmt.TSelectSqlStatement select, 1621 String columnName, 1622 java.util.Set<gudusoft.gsqlparser.stmt.TSelectSqlStatement> visited) { 1623 if (select == null || select.tables == null || columnName == null) { 1624 return false; 1625 } 1626 if (!visited.add(select)) { 1627 return false; 1628 } 1629 1630 java.util.List<TTable> sources = findStarExpansionSources(select); 1631 if (sources == null) { 1632 // Unqualified star is present - it can expose a column from any 1633 // FROM-clause table, so walk them all. 1634 sources = new java.util.ArrayList<>(); 1635 for (int i = 0; i < select.tables.size(); i++) { 1636 TTable t = select.tables.getTable(i); 1637 if (t != null) sources.add(t); 1638 } 1639 } 1640 1641 for (TTable table : sources) { 1642 gudusoft.gsqlparser.stmt.TSelectSqlStatement nested = null; 1643 if (table.isCTEName() && table.getCTE() != null) { 1644 nested = table.getCTE().getSubquery(); 1645 } else if (table.getSubquery() != null) { 1646 nested = table.getSubquery(); 1647 } 1648 if (nested == null) continue; 1649 1650 if (isRenamedAliasInSelect(nested, columnName)) { 1651 return true; 1652 } 1653 if (isRenamedAliasInChain(nested, columnName, visited)) { 1654 return true; 1655 } 1656 } 1657 return false; 1658 } 1659 1660 /** 1661 * Collect the FROM-clause tables that can expose any inferred column 1662 * through a star expansion in {@code select}'s SELECT list. 1663 * 1664 * <p>Return value semantics:</p> 1665 * <ul> 1666 * <li>{@code null} - an unqualified {@code *} is present, so every 1667 * FROM-clause table is a potential source.</li> 1668 * <li>Non-null, possibly empty - only the resolved qualified-star 1669 * sources (e.g. the {@code t} in {@code t.*}).</li> 1670 * </ul> 1671 */ 1672 private java.util.List<TTable> findStarExpansionSources( 1673 gudusoft.gsqlparser.stmt.TSelectSqlStatement select) { 1674 if (select == null || select.tables == null) { 1675 return java.util.Collections.emptyList(); 1676 } 1677 gudusoft.gsqlparser.nodes.TResultColumnList rcs = select.getResultColumnList(); 1678 if (rcs == null) { 1679 return java.util.Collections.emptyList(); 1680 } 1681 java.util.LinkedHashSet<TTable> sources = new java.util.LinkedHashSet<>(); 1682 for (int i = 0; i < rcs.size(); i++) { 1683 TResultColumn rc = rcs.getResultColumn(i); 1684 if (rc == null) continue; 1685 String text = rc.toString(); 1686 if (text == null) continue; 1687 text = text.trim(); 1688 if (!text.endsWith("*")) continue; 1689 if (text.equals("*")) { 1690 // Unqualified star - any FROM-clause table is a potential source. 1691 return null; 1692 } 1693 int dot = text.lastIndexOf('.'); 1694 if (dot <= 0) continue; 1695 String prefix = text.substring(0, dot).trim(); 1696 TTable matched = findStarPrefixTable(select, prefix); 1697 if (matched != null) { 1698 sources.add(matched); 1699 } 1700 } 1701 return new java.util.ArrayList<>(sources); 1702 } 1703 1704 /** 1705 * Find a FROM-clause table whose alias (or table name, when unaliased) 1706 * matches the prefix of a qualified star like {@code prefix.*}. 1707 */ 1708 private TTable findStarPrefixTable( 1709 gudusoft.gsqlparser.stmt.TSelectSqlStatement select, String prefix) { 1710 if (select == null || select.tables == null || prefix == null || prefix.isEmpty()) { 1711 return null; 1712 } 1713 for (int i = 0; i < select.tables.size(); i++) { 1714 TTable table = select.tables.getTable(i); 1715 if (table == null) continue; 1716 String alias = table.getAliasName(); 1717 if (alias != null && aliasMatches(alias, prefix)) { 1718 return table; 1719 } 1720 if (alias == null && table.getTableName() != null) { 1721 String name = table.getTableName().toString(); 1722 if (name != null && tableMatches(name, prefix)) { 1723 return table; 1724 } 1725 } 1726 } 1727 return null; 1728 } 1729 1730 /** 1731 * Check if this column source represents a column alias (renamed column). 1732 * 1733 * <p>A column is an alias if it's a simple column reference in a subquery 1734 * that has been given a different name via AS or NAMED. For example:</p> 1735 * <ul> 1736 * <li>{@code SELECT col AS alias FROM table} - alias is different from col</li> 1737 * <li>{@code SELECT col (NAMED alias) FROM table} - Teradata NAMED syntax</li> 1738 * <li>{@code SELECT alias = col FROM table} - SQL Server proprietary syntax</li> 1739 * </ul> 1740 * 1741 * <p>Column aliases are traced through in {@link #getFinalTable()} to find the 1742 * physical table the data originates from, since the alias only renames the column 1743 * but the data still comes from the physical table.</p> 1744 * 1745 * @return true if this is a column alias with a different name than the original 1746 */ 1747 public boolean isColumnAlias() { 1748 if (definitionNode == null) { 1749 return false; 1750 } 1751 1752 if (!(definitionNode instanceof TResultColumn)) { 1753 return false; 1754 } 1755 1756 TResultColumn rc = (TResultColumn) definitionNode; 1757 TExpression expr = rc.getExpr(); 1758 if (expr == null) { 1759 return false; 1760 } 1761 1762 EExpressionType exprType = expr.getExpressionType(); 1763 1764 // Handle SQL Server proprietary alias syntax: alias = column 1765 // Example: stat_typ = stellplatz_typ 1766 if (exprType == EExpressionType.sqlserver_proprietary_column_alias_t) { 1767 TExpression rightExpr = expr.getRightOperand(); 1768 TExpression leftExpr = expr.getLeftOperand(); 1769 // Only if right side is a simple column reference 1770 if (rightExpr != null && leftExpr != null && 1771 rightExpr.getExpressionType() == EExpressionType.simple_object_name_t) { 1772 gudusoft.gsqlparser.nodes.TObjectName rightObjName = rightExpr.getObjectOperand(); 1773 gudusoft.gsqlparser.nodes.TObjectName leftObjName = leftExpr.getObjectOperand(); 1774 if (rightObjName != null && leftObjName != null) { 1775 String origName = rightObjName.getColumnNameOnly(); 1776 String aliasName = leftObjName.getColumnNameOnly(); 1777 // If alias name differs from original column name, it's an alias 1778 if (origName != null && aliasName != null && 1779 !origName.equalsIgnoreCase(aliasName)) { 1780 return true; 1781 } 1782 } 1783 } 1784 return false; 1785 } 1786 1787 // Standard alias syntax: column AS alias 1788 // Only applies to simple column references 1789 if (exprType != EExpressionType.simple_object_name_t) { 1790 return false; 1791 } 1792 1793 // Check if there's an alias that differs from the column name 1794 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1795 String aliasName = rc.getAliasClause().getAliasName().toString(); 1796 if (aliasName != null && !aliasName.isEmpty()) { 1797 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 1798 if (objName != null) { 1799 String origName = objName.getColumnNameOnly(); 1800 // If alias name differs from original name, it's an alias 1801 if (origName != null && !origName.equalsIgnoreCase(aliasName)) { 1802 return true; 1803 } 1804 } 1805 } 1806 } 1807 1808 return false; 1809 } 1810 1811 /** 1812 * Check if this column is a CTE explicit column with a different name than the underlying column. 1813 * 1814 * <p>A CTE explicit column is one defined in the CTE's column list that maps to a 1815 * different column name in the CTE's SELECT list. For example:</p> 1816 * <pre> 1817 * WITH cte(c1, c2) AS (SELECT id, name FROM users) 1818 * SELECT c1 FROM cte -- c1 maps to 'id', names differ 1819 * </pre> 1820 * 1821 * <p>CTE explicit columns should NOT trace to base tables because the explicit 1822 * column name (c1) doesn't exist as an actual column in the base table (users).</p> 1823 * 1824 * @return true if this is a CTE explicit column with a different name 1825 */ 1826 public boolean isCTEExplicitColumn() { 1827 // Must be from a CTENamespace 1828 if (!(sourceNamespace instanceof CTENamespace)) { 1829 return false; 1830 } 1831 1832 // Check evidence for explicit column marker 1833 if (!"cte_explicit_column".equals(evidence)) { 1834 return false; 1835 } 1836 1837 // Get the underlying column name from the definition node 1838 if (definitionNode == null || !(definitionNode instanceof TResultColumn)) { 1839 return false; 1840 } 1841 1842 TResultColumn rc = (TResultColumn) definitionNode; 1843 TExpression expr = rc.getExpr(); 1844 if (expr == null) { 1845 return false; 1846 } 1847 1848 // Get the column name from the SELECT list item 1849 String underlyingName = null; 1850 1851 // Check for alias first 1852 if (rc.getAliasClause() != null && rc.getAliasClause().getAliasName() != null) { 1853 underlyingName = rc.getAliasClause().getAliasName().toString(); 1854 } 1855 // Then check for simple column reference 1856 else if (expr.getExpressionType() == EExpressionType.simple_object_name_t && 1857 expr.getObjectOperand() != null) { 1858 underlyingName = expr.getObjectOperand().getColumnNameOnly(); 1859 } 1860 1861 // If we can't determine the underlying name, assume it's different 1862 // (calculated expressions, etc. are definitely different from explicit column names) 1863 if (underlyingName == null) { 1864 return true; 1865 } 1866 1867 // If the exposed name differs from the underlying column name, it's an explicit column rename 1868 return !exposedName.equalsIgnoreCase(underlyingName); 1869 } 1870 1871 /** 1872 * Get the override table, if set. 1873 */ 1874 public TTable getOverrideTable() { 1875 return overrideTable; 1876 } 1877 1878 /** 1879 * Get the candidate tables for ambiguous columns. 1880 * 1881 * <p>When a column could come from multiple tables (e.g., SELECT * FROM t1, t2), 1882 * this returns all possible source tables. End users can iterate through this 1883 * list to understand all potential sources for the column.</p> 1884 * 1885 * @return List of candidate tables, or empty list if not ambiguous 1886 */ 1887 public List<TTable> getCandidateTables() { 1888 return candidateTables != null ? candidateTables : Collections.emptyList(); 1889 } 1890 1891 /** 1892 * Check if this column has multiple candidate tables (is ambiguous). 1893 * 1894 * @return true if there are multiple candidate tables 1895 */ 1896 public boolean isAmbiguous() { 1897 return candidateTables != null && candidateTables.size() > 1; 1898 } 1899 1900 /** 1901 * Get the field path for deep/record field access. 1902 * 1903 * <p>When a column reference includes field access beyond the base column, 1904 * this returns the field path. For example, in {@code customer.address.city}, 1905 * if base column is {@code customer}, this returns a FieldPath with 1906 * segments {@code ["address", "city"]}.</p> 1907 * 1908 * @return The field path, or null if no field access 1909 */ 1910 public FieldPath getFieldPath() { 1911 return fieldPath; 1912 } 1913 1914 /** 1915 * Check if this column source has a field path (deep/record field access). 1916 * 1917 * @return true if a non-empty field path exists 1918 */ 1919 public boolean hasFieldPath() { 1920 return fieldPath != null && !fieldPath.isEmpty(); 1921 } 1922 1923 /** 1924 * Check if this is a struct field access (has evidence "struct_field_access"). 1925 * 1926 * <p>This is a convenience method for checking if this column source represents 1927 * a struct/record field dereference operation.</p> 1928 * 1929 * @return true if this is a struct field access 1930 */ 1931 public boolean isStructFieldAccess() { 1932 return "struct_field_access".equals(evidence); 1933 } 1934 1935 /** 1936 * Checks if this is a definite resolution (confidence = 1.0) 1937 */ 1938 public boolean isDefinite() { 1939 return confidence >= 1.0; 1940 } 1941 1942 /** 1943 * Checks if this is an inferred resolution (confidence < 1.0) 1944 */ 1945 public boolean isInferred() { 1946 return confidence < 1.0; 1947 } 1948 1949 @Override 1950 public String toString() { 1951 StringBuilder sb = new StringBuilder(); 1952 sb.append(exposedName); 1953 if (sourceNamespace != null) { 1954 sb.append(" from ").append(sourceNamespace.getDisplayName()); 1955 } 1956 if (confidence < 1.0) { 1957 sb.append(String.format(" (confidence: %.2f)", confidence)); 1958 } 1959 return sb.toString(); 1960 } 1961 1962 /** 1963 * Creates a copy with updated confidence and evidence. 1964 * Used when merging or updating inference results. 1965 * 1966 * @deprecated Use {@link #withEvidence(ResolutionEvidence)} instead 1967 */ 1968 public ColumnSource withConfidence(double newConfidence, String newEvidence) { 1969 return new ColumnSource( 1970 this.sourceNamespace, 1971 this.exposedName, 1972 this.definitionNode, 1973 newConfidence, 1974 newEvidence, 1975 this.overrideTable, 1976 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1977 null, // will create from legacy evidence 1978 this.fieldPath 1979 ); 1980 } 1981 1982 /** 1983 * Creates a copy with updated ResolutionEvidence. 1984 * This is the preferred method for updating evidence in new code. 1985 * 1986 * @param newEvidence The new evidence detail 1987 * @return A new ColumnSource with updated evidence 1988 */ 1989 public ColumnSource withEvidence(ResolutionEvidence newEvidence) { 1990 return new ColumnSource( 1991 this.sourceNamespace, 1992 this.exposedName, 1993 this.definitionNode, 1994 newEvidence != null ? newEvidence.getWeight() : this.confidence, 1995 newEvidence != null ? newEvidence.toLegacyEvidence() : this.evidence, 1996 this.overrideTable, 1997 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 1998 newEvidence, 1999 this.fieldPath 2000 ); 2001 } 2002 2003 /** 2004 * Creates a copy with candidate tables. 2005 * Used when a column could come from multiple tables. 2006 */ 2007 public ColumnSource withCandidateTables(List<TTable> candidates) { 2008 return new ColumnSource( 2009 this.sourceNamespace, 2010 this.exposedName, 2011 this.definitionNode, 2012 this.confidence, 2013 this.evidence, 2014 this.overrideTable, 2015 candidates != null ? new java.util.ArrayList<>(candidates) : null, 2016 this.evidenceDetail, 2017 this.fieldPath 2018 ); 2019 } 2020 2021 /** 2022 * Creates a copy with a field path for deep/record field access. 2023 * 2024 * <p>This method is used when resolving struct/record field access patterns 2025 * like {@code customer.address.city}. The base column is preserved as the 2026 * exposedName, and the field path captures the remaining segments.</p> 2027 * 2028 * @param newFieldPath The field path segments (beyond the base column) 2029 * @return A new ColumnSource with the field path set 2030 */ 2031 public ColumnSource withFieldPath(FieldPath newFieldPath) { 2032 return new ColumnSource( 2033 this.sourceNamespace, 2034 this.exposedName, 2035 this.definitionNode, 2036 this.confidence, 2037 this.evidence, 2038 this.overrideTable, 2039 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 2040 this.evidenceDetail, 2041 newFieldPath 2042 ); 2043 } 2044 2045 /** 2046 * Creates a copy with a field path from a list of segments. 2047 * 2048 * <p>Convenience method for creating a ColumnSource with a field path 2049 * from a list of string segments.</p> 2050 * 2051 * @param segments The field path segments 2052 * @return A new ColumnSource with the field path set 2053 */ 2054 public ColumnSource withFieldPath(List<String> segments) { 2055 return withFieldPath(FieldPath.of(segments)); 2056 } 2057 2058 /** 2059 * Creates a copy with field path and updated evidence. 2060 * 2061 * <p>This method is used when resolving struct field access, combining 2062 * both the field path and the struct_field_access evidence marker.</p> 2063 * 2064 * @param newFieldPath The field path segments 2065 * @param newEvidence The evidence string (e.g., "struct_field_access") 2066 * @return A new ColumnSource with field path and evidence updated 2067 */ 2068 public ColumnSource withFieldPath(FieldPath newFieldPath, String newEvidence) { 2069 return new ColumnSource( 2070 this.sourceNamespace, 2071 this.exposedName, 2072 this.definitionNode, 2073 this.confidence, 2074 newEvidence, 2075 this.overrideTable, 2076 this.candidateTables != null ? new java.util.ArrayList<>(this.candidateTables) : null, 2077 null, // will create from legacy evidence 2078 newFieldPath 2079 ); 2080 } 2081 2082 /** 2083 * Count the number of "real" tables in a table list, excluding implicit lateral derived tables. 2084 * 2085 * <p>Teradata supports implicit lateral derived tables, which are auto-added when a column 2086 * references an undeclared table in the WHERE clause. These should not be counted when 2087 * determining if a subquery has multiple tables for column resolution purposes.</p> 2088 * 2089 * @param tables The table list to count 2090 * @return The number of real (non-implicit) tables 2091 */ 2092 private static int countRealTables(gudusoft.gsqlparser.nodes.TTableList tables) { 2093 if (tables == null) { 2094 return 0; 2095 } 2096 int count = 0; 2097 for (int i = 0; i < tables.size(); i++) { 2098 TTable table = tables.getTable(i); 2099 if (table != null && table.getEffectType() != gudusoft.gsqlparser.ETableEffectType.tetImplicitLateralDerivedTable) { 2100 count++; 2101 } 2102 } 2103 return count; 2104 } 2105 2106 /** 2107 * Check if a SELECT statement has a qualified star column (e.g., ta.*, tb.*). 2108 * Qualified stars identify which table columns come from in multi-table subqueries. 2109 */ 2110 private static boolean hasQualifiedStar(gudusoft.gsqlparser.stmt.TSelectSqlStatement select) { 2111 if (select == null || select.getResultColumnList() == null) { 2112 return false; 2113 } 2114 gudusoft.gsqlparser.nodes.TResultColumnList resultCols = select.getResultColumnList(); 2115 for (int i = 0; i < resultCols.size(); i++) { 2116 TResultColumn rc = resultCols.getResultColumn(i); 2117 if (rc != null) { 2118 String colStr = rc.toString().trim(); 2119 // Qualified star has format "alias.*" or "table.*" 2120 if (colStr.endsWith("*") && colStr.contains(".")) { 2121 return true; 2122 } 2123 } 2124 } 2125 return false; 2126 } 2127 2128 /** 2129 * Check if a column exists in a table's DDL definition. 2130 * 2131 * <p>This method checks the table's column definitions (from CREATE TABLE statements 2132 * parsed in the same script) to verify if the column name is defined.</p> 2133 * 2134 * @param table The table to check 2135 * @param columnName The column name to look for 2136 * @return true if the column exists in the table's DDL, false if not found or no DDL available 2137 */ 2138 public static boolean isColumnInTableDdl(TTable table, String columnName) { 2139 if (table == null || columnName == null || columnName.isEmpty()) { 2140 return false; 2141 } 2142 2143 // Check if the table has column definitions (from CREATE TABLE DDL) 2144 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 2145 if (columnDefs != null && columnDefs.size() > 0) { 2146 // S2: route compare through IdentifierService so quoted-sensitive 2147 // dialects (Oracle quoted, BigQuery tables) and BigQuery's 2148 // case-insensitive columns are handled per-vendor. 2149 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvgeneric; 2150 for (int i = 0; i < columnDefs.size(); i++) { 2151 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 2152 if (colDef != null && colDef.getColumnName() != null) { 2153 String defColName = colDef.getColumnName().toString(); 2154 if (defColName != null 2155 && IdentifierService.areEqualStatic(vendor, ESQLDataObjectType.dotColumn, defColName, columnName)) { 2156 return true; 2157 } 2158 } 2159 } 2160 // DDL exists but column not found 2161 return false; 2162 } 2163 2164 // No DDL available - return false (cannot verify) 2165 return false; 2166 } 2167 2168 /** 2169 * Check if a table has DDL metadata available (from CREATE TABLE in same script). 2170 * 2171 * @param table The table to check 2172 * @return true if DDL metadata is available for this table 2173 */ 2174 public static boolean hasTableDdl(TTable table) { 2175 if (table == null) { 2176 return false; 2177 } 2178 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 2179 return columnDefs != null && columnDefs.size() > 0; 2180 } 2181 2182 /** 2183 * Check DDL verification status for a candidate table. 2184 * 2185 * <p>Returns a tri-state result:</p> 2186 * <ul> 2187 * <li>1 = Column exists in table's DDL</li> 2188 * <li>0 = Column NOT found in table's DDL (DDL available but column missing)</li> 2189 * <li>-1 = Cannot verify (no DDL available for this table)</li> 2190 * </ul> 2191 * 2192 * @param table The candidate table to check 2193 * @param columnName The column name to verify 2194 * @return DDL verification status: 1 (exists), 0 (not found), -1 (no DDL) 2195 */ 2196 public static int getDdlVerificationStatus(TTable table, String columnName) { 2197 if (table == null || columnName == null) { 2198 return -1; 2199 } 2200 2201 gudusoft.gsqlparser.nodes.TColumnDefinitionList columnDefs = table.getColumnDefinitions(); 2202 if (columnDefs == null || columnDefs.size() == 0) { 2203 return -1; // No DDL available 2204 } 2205 2206 // DDL available - check if column exists (S2: vendor-aware compare) 2207 EDbVendor vendor = table.dbvendor != null ? table.dbvendor : EDbVendor.dbvgeneric; 2208 for (int i = 0; i < columnDefs.size(); i++) { 2209 gudusoft.gsqlparser.nodes.TColumnDefinition colDef = columnDefs.getColumn(i); 2210 if (colDef != null && colDef.getColumnName() != null) { 2211 String defColName = colDef.getColumnName().toString(); 2212 if (defColName != null 2213 && IdentifierService.areEqualStatic(vendor, ESQLDataObjectType.dotColumn, defColName, columnName)) { 2214 return 1; // Column exists in DDL 2215 } 2216 } 2217 } 2218 2219 return 0; // DDL exists but column not found 2220 } 2221 2222 /** 2223 * Get DDL verification status for all candidate tables. 2224 * 2225 * <p>Returns a map from each candidate table to its DDL verification status:</p> 2226 * <ul> 2227 * <li>1 = Column exists in table's DDL</li> 2228 * <li>0 = Column NOT found in table's DDL</li> 2229 * <li>-1 = Cannot verify (no DDL available)</li> 2230 * </ul> 2231 * 2232 * @return Map of candidate tables to their DDL verification status, or empty map if no candidates 2233 */ 2234 public java.util.Map<TTable, Integer> getCandidateTableDdlStatus() { 2235 java.util.Map<TTable, Integer> result = new java.util.LinkedHashMap<>(); 2236 if (candidateTables == null || candidateTables.isEmpty() || exposedName == null) { 2237 return result; 2238 } 2239 2240 for (TTable candidate : candidateTables) { 2241 int status = getDdlVerificationStatus(candidate, exposedName); 2242 result.put(candidate, status); 2243 } 2244 return result; 2245 } 2246}