001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.ETableSource; 005import gudusoft.gsqlparser.TSourceToken; 006import gudusoft.gsqlparser.nodes.TObjectName; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 009import gudusoft.gsqlparser.resolver2.model.AmbiguousColumnSource; 010import gudusoft.gsqlparser.resolver2.model.ColumnSource; 011import gudusoft.gsqlparser.resolver2.model.FieldPath; 012import gudusoft.gsqlparser.resolver2.model.ResolutionContext; 013import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 014import gudusoft.gsqlparser.resolver2.namespace.INamespace; 015import gudusoft.gsqlparser.resolver2.namespace.UnnestNamespace; 016import gudusoft.gsqlparser.resolver2.scope.IScope; 017import gudusoft.gsqlparser.resolver2.scope.ResolvedImpl; 018 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.Collections; 022import java.util.Comparator; 023import java.util.List; 024 025/** 026 * Core component for resolving column references to their sources. 027 * 028 * Key responsibilities: 029 * 1. Resolve TObjectName (column references) using scope tree 030 * 2. Handle qualified and unqualified names 031 * 3. Detect and report ambiguities 032 * 4. Apply GUESS_COLUMN_STRATEGY for ambiguous columns 033 * 5. Update TObjectName with resolution results 034 * 6. Update ResolutionContext for global querying 035 */ 036public class NameResolver { 037 038 private final INameMatcher nameMatcher; 039 private final ResolutionContext context; 040 private final TSQLResolverConfig config; 041 042 /** 043 * Create a NameResolver with full configuration. 044 * 045 * @param config The resolver configuration (includes name matcher and strategy) 046 * @param context The resolution context for tracking results 047 */ 048 public NameResolver(TSQLResolverConfig config, ResolutionContext context) { 049 this.config = config; 050 this.nameMatcher = config.getNameMatcher(); 051 this.context = context; 052 } 053 054 /** 055 * Create a NameResolver with just name matcher (backward compatibility). 056 * Uses default configuration for GUESS_COLUMN_STRATEGY. 057 * 058 * @deprecated Use NameResolver(TSQLResolverConfig, ResolutionContext) instead 059 */ 060 @Deprecated 061 public NameResolver(INameMatcher nameMatcher, ResolutionContext context) { 062 this.nameMatcher = nameMatcher; 063 this.context = context; 064 this.config = null; // Will use TBaseType.GUESS_COLUMN_STRATEGY 065 } 066 067 /** 068 * Get the effective GUESS_COLUMN_STRATEGY. 069 * Returns config value if available, otherwise TBaseType.GUESS_COLUMN_STRATEGY. 070 */ 071 private int getGuessColumnStrategy() { 072 if (config != null) { 073 return config.getGuessColumnStrategy(); 074 } 075 return gudusoft.gsqlparser.TBaseType.GUESS_COLUMN_STRATEGY; 076 } 077 078 private static final boolean DEBUG_RESOLUTION = false; 079 080 /** 081 * Resolve a column reference (TObjectName) within a given scope. 082 * 083 * @param objName The column reference to resolve 084 * @param scope The scope where the reference appears 085 * @return Resolution result 086 */ 087 public ResolutionResult resolve(TObjectName objName, IScope scope) { 088 if (objName == null || scope == null) { 089 return ResolutionResult.notFound("<null>"); 090 } 091 092 // Extract name parts from TObjectName 093 List<String> nameParts = extractNameParts(objName); 094 if (nameParts.isEmpty()) { 095 return ResolutionResult.notFound("<empty>"); 096 } 097 098 if (DEBUG_RESOLUTION) { 099 System.out.println("[DEBUG-RESOLVE] Resolving: " + objName + 100 " nameParts=" + nameParts + " scopeType=" + scope.getScopeType()); 101 } 102 103 // Use scope to resolve the name 104 ResolvedImpl resolved = new ResolvedImpl(); 105 scope.resolve(nameParts, nameMatcher, false, resolved); 106 107 if (DEBUG_RESOLUTION) { 108 System.out.println("[DEBUG-RESOLVE] Resolved matches: " + resolved.getCount()); 109 if (resolved.getCount() > 1) { 110 for (ResolvedImpl.Match m : resolved.getMatches()) { 111 System.out.println("[DEBUG-RESOLVE] Match: " + m.namespace.getDisplayName() + 112 " type=" + m.namespace.getClass().getSimpleName() + 113 " id=" + System.identityHashCode(m.namespace) + 114 " remaining=" + m.remainingNames + 115 " scope=" + m.scope.getScopeType()); 116 } 117 } 118 } 119 120 // Process resolution results 121 ResolutionResult result = processResolvedMatches(objName, nameParts, resolved); 122 123 // Delta 3: Struct-field fallback for BigQuery/Snowflake 124 // If resolution failed and we have a 2-part qualified name like "customer.customer_id", 125 // try interpreting it as column.field (struct field access) instead of table.column. 126 // Only for 2-part names; 3+ part no-alias case (e.g., customer.address.city) is not 127 // handled here to avoid changing resolution paths for existing 3-part BigQuery patterns 128 // (e.g., purchases.first.msts) that are correctly handled by DataFlowAnalyzer heuristics. 129 // The alias case (o.customer.address.city) IS handled by resolveColumnPath() in 130 // processResolvedMatches() and does not depend on this fallback. 131 if (!result.isExactMatch() && nameParts.size() == 2 && isStructFieldVendor()) { 132 ResolutionResult structFieldResult = tryStructFieldFallback(objName, nameParts, scope); 133 if (structFieldResult != null && structFieldResult.isExactMatch()) { 134 result = structFieldResult; 135 if (DEBUG_RESOLUTION) { 136 System.out.println("[DEBUG-RESOLVE] Struct-field fallback succeeded for: " + objName); 137 } 138 } 139 } 140 141 // Delta 4: Side-channel hint for 3+ part no-alias struct access (BigQuery only). 142 // When resolution failed and we have 3+ parts (e.g., customer.address.city), 143 // set a StructFieldHint WITHOUT changing the resolution result or sourceTable. 144 // This provides struct path info to DataFlowAnalyzer without altering lineage topology. 145 if (!result.isExactMatch() && nameParts.size() >= 3 && isStructFieldHintVendor()) { 146 trySetStructFieldHint(objName, nameParts, scope); 147 } 148 149 if (DEBUG_RESOLUTION) { 150 System.out.println("[DEBUG-RESOLVE] Result: " + result.getStatus() + 151 (result.isExactMatch() && result.getColumnSource() != null ? 152 " source=" + result.getColumnSource().getExposedName() : "")); 153 } 154 155 // Update TObjectName and context 156 updateObjectNameWithResult(objName, result); 157 158 return result; 159 } 160 161 /** 162 * Check if the current vendor supports struct-field access syntax (column.field). 163 * Currently supported: BigQuery, Snowflake 164 */ 165 private boolean isStructFieldVendor() { 166 if (config == null) { 167 return false; 168 } 169 EDbVendor vendor = config.getVendor(); 170 return vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake; 171 } 172 173 /** 174 * Check if the current vendor supports struct-field hint annotations. 175 * Currently BigQuery only (Snowflake uses schema.table.column for 3-part names). 176 */ 177 private boolean isStructFieldHintVendor() { 178 if (config == null) { 179 return false; 180 } 181 return config.getVendor() == EDbVendor.dbvbigquery; 182 } 183 184 /** 185 * Delta 4: Try to set a StructFieldHint for 3+ part no-alias struct access. 186 * 187 * For "customer.address.city" (3 parts, no alias): 188 * - Treat the first part ("customer") as a potential base column 189 * - If found in any visible namespace, set a hint with fieldPath=["address", "city"] 190 * - Does NOT change ResolutionResult or sourceTable 191 * 192 * @param objName The TObjectName to annotate with a hint 193 * @param nameParts The extracted name parts (e.g., ["customer", "address", "city"]) 194 * @param scope The scope to search in 195 */ 196 private void trySetStructFieldHint(TObjectName objName, List<String> nameParts, IScope scope) { 197 String baseColumnName = nameParts.get(0); 198 List<String> fieldPathSegments = nameParts.subList(1, nameParts.size()); 199 200 // Try to find the base column as an unqualified name 201 List<String> singlePartName = Collections.singletonList(baseColumnName); 202 ResolvedImpl resolved = new ResolvedImpl(); 203 scope.resolve(singlePartName, nameMatcher, false, resolved); 204 205 if (resolved.isEmpty()) { 206 return; // Base column not found in any namespace 207 } 208 209 // Check if any namespace actually has this column 210 for (ResolvedImpl.Match match : resolved.getMatches()) { 211 gudusoft.gsqlparser.resolver2.model.ColumnSource baseColumnSource = 212 match.namespace.resolveColumn(baseColumnName); 213 if (baseColumnSource != null) { 214 // Found the base column - create hint 215 gudusoft.gsqlparser.resolver2.model.FieldPath fieldPath = 216 gudusoft.gsqlparser.resolver2.model.FieldPath.of(fieldPathSegments); 217 gudusoft.gsqlparser.resolver2.model.StructFieldHint hint = 218 new gudusoft.gsqlparser.resolver2.model.StructFieldHint( 219 baseColumnName, fieldPath, 220 "struct_field_hint_no_alias", 0.7); 221 objName.setStructFieldHint(hint); 222 223 if (DEBUG_RESOLUTION) { 224 System.out.println("[DEBUG-RESOLVE] StructFieldHint set for: " + objName + 225 " -> base=" + baseColumnName + ", fieldPath=" + fieldPathSegments); 226 } 227 return; 228 } 229 } 230 } 231 232 /** 233 * Delta 3: Try to resolve a qualified name as struct-field access (column.field). 234 * 235 * In BigQuery/Snowflake, "customer.customer_id" might be: 236 * 1. Table "customer" with column "customer_id" (standard interpretation) 237 * 2. Column "customer" (STRUCT type) with field "customer_id" (struct-field access) 238 * 239 * If the standard interpretation failed, try the struct-field interpretation: 240 * - Treat the first part as an unqualified column name 241 * - If found, return the base column as the source (the STRUCT column) 242 * - Preserve the field path (segments beyond the base column) for downstream use 243 * 244 * @param objName The original TObjectName 245 * @param nameParts The extracted name parts (e.g., ["customer", "customer_id"]) 246 * @param scope The scope to search in 247 * @return Resolution result if struct-field interpretation succeeds, null otherwise 248 */ 249 private ResolutionResult tryStructFieldFallback(TObjectName objName, List<String> nameParts, IScope scope) { 250 // The base column is the first part (e.g., "customer" in "customer.customer_id") 251 String baseColumnName = nameParts.get(0); 252 253 // The field path is everything after the base column 254 // For "customer.customer_id", fieldPath = ["customer_id"] 255 // For "customer.address.city", fieldPath = ["address", "city"] 256 List<String> fieldPathSegments = nameParts.size() > 1 257 ? nameParts.subList(1, nameParts.size()) 258 : Collections.emptyList(); 259 260 // Try to resolve the base column as an unqualified name 261 List<String> singlePartName = Collections.singletonList(baseColumnName); 262 ResolvedImpl resolved = new ResolvedImpl(); 263 scope.resolve(singlePartName, nameMatcher, false, resolved); 264 265 if (resolved.isEmpty()) { 266 return null; // Base column not found 267 } 268 269 // Found potential matches - check if any namespace has this column 270 for (ResolvedImpl.Match match : resolved.getMatches()) { 271 INamespace namespace = match.namespace; 272 273 // Try to resolve the base column name in this namespace 274 ColumnSource baseColumnSource = namespace.resolveColumn(baseColumnName); 275 if (baseColumnSource != null) { 276 // Found the base column - return it as the source with field path preserved 277 if (DEBUG_RESOLUTION) { 278 System.out.println("[DEBUG-RESOLVE] Struct-field: found base column '" + 279 baseColumnName + "' in " + namespace.getDisplayName() + 280 ", fieldPath=" + fieldPathSegments); 281 } 282 283 // Create a new ColumnSource with: 284 // 1. struct_field_access evidence marker (for backward compatibility) 285 // 2. fieldPath preserved (new in Improvement B) 286 FieldPath fieldPath = FieldPath.of(fieldPathSegments); 287 ColumnSource structFieldSource = baseColumnSource.withFieldPath(fieldPath, "struct_field_access"); 288 289 return ResolutionResult.exactMatch(structFieldSource); 290 } 291 } 292 293 return null; 294 } 295 296 /** 297 * Extract name parts from TObjectName. 298 * Examples: 299 * - "col" -> ["col"] 300 * - "t.col" -> ["t", "col"] 301 * - "schema.table.col" -> ["schema", "table", "col"] 302 * 303 * For BigQuery/Snowflake struct field access like "customer.customer_id": 304 * - If schema/table tokens are not set, but toString() contains dots, 305 * extract all parts from toString() to capture field paths. 306 */ 307 private List<String> extractNameParts(TObjectName objName) { 308 List<String> parts = new ArrayList<>(); 309 310 // Add schema if present, but NOT when databaseToken is also set. 311 // When databaseToken is present, the name is fully qualified (db.schema.table.column) 312 // and the schema position IS the schema, not a table alias. Including it would cause 313 // the resolver to incorrectly match the schema against table aliases. (Mantis #4268) 314 if (objName.getSchemaToken() != null && objName.getDatabaseToken() == null) { 315 parts.add(objName.getSchemaString()); 316 } 317 318 // Add table/qualifier if present 319 if (objName.getTableToken() != null) { 320 parts.add(objName.getTableString()); 321 } 322 323 // Add column name 324 String columnName = objName.getColumnNameOnly(); 325 if (columnName != null) { 326 parts.add(columnName); 327 } 328 329 // Improvement B: Handle BigQuery/Snowflake struct field access 330 // If we only got a single part from standard extraction, 331 // but toString() contains more segments (dots), extract them 332 // This handles cases like "customer.customer_id" where: 333 // - getColumnNameOnly() returns "customer" 334 // - toString() returns "customer.customer_id" 335 if (isStructFieldVendor()) { 336 String fullName = objName.toString(); 337 if (fullName != null && fullName.contains(".")) { 338 // Check if fullName has more segments than what we extracted 339 String[] fullParts = splitNameParts(fullName); 340 if (fullParts.length > parts.size() || hasConsecutiveDuplicates(parts)) { 341 // Replace parts with full extracted segments 342 parts.clear(); 343 Collections.addAll(parts, fullParts); 344 } 345 } 346 } 347 348 return parts; 349 } 350 351 /** 352 * Check if a list has consecutive duplicate elements. 353 * This detects parser bugs where segments are duplicated. 354 * Example: ["customer", "customer", "address"] returns true. 355 */ 356 private boolean hasConsecutiveDuplicates(List<String> list) { 357 if (list == null || list.size() < 2) { 358 return false; 359 } 360 for (int i = 1; i < list.size(); i++) { 361 if (list.get(i) != null && list.get(i).equals(list.get(i - 1))) { 362 return true; 363 } 364 } 365 return false; 366 } 367 368 369 /** 370 * Split a dotted name into parts, handling quoted identifiers. 371 * Examples: 372 * - "a.b.c" -> ["a", "b", "c"] 373 * - "`a.b`.c" -> ["`a.b`", "c"] 374 * - "a.`b.c`" -> ["a", "`b.c`"] 375 * 376 * @param name The dotted name string 377 * @return Array of name parts 378 */ 379 private String[] splitNameParts(String name) { 380 if (name == null || name.isEmpty()) { 381 return new String[0]; 382 } 383 384 // Simple case: no quotes, just split on dots 385 if (!name.contains("`") && !name.contains("\"") && !name.contains("[")) { 386 return name.split("\\."); 387 } 388 389 // Complex case: handle quoted identifiers 390 List<String> parts = new ArrayList<>(); 391 StringBuilder current = new StringBuilder(); 392 char quoteChar = 0; 393 394 for (int i = 0; i < name.length(); i++) { 395 char c = name.charAt(i); 396 397 if (quoteChar != 0) { 398 // Inside a quoted identifier 399 current.append(c); 400 if (c == quoteChar) { 401 // Check for escaped quote (doubled) 402 if (i + 1 < name.length() && name.charAt(i + 1) == quoteChar) { 403 current.append(name.charAt(++i)); 404 } else { 405 // End of quoted identifier 406 quoteChar = 0; 407 } 408 } 409 } else if (c == '`' || c == '"' || c == '[') { 410 // Start of quoted identifier 411 quoteChar = (c == '[') ? ']' : c; 412 current.append(c); 413 } else if (c == '.') { 414 // Separator 415 if (current.length() > 0) { 416 parts.add(current.toString()); 417 current.setLength(0); 418 } 419 } else { 420 current.append(c); 421 } 422 } 423 424 // Add last part 425 if (current.length() > 0) { 426 parts.add(current.toString()); 427 } 428 429 return parts.toArray(new String[0]); 430 } 431 432 /** 433 * Process resolution matches and determine final result. 434 */ 435 private ResolutionResult processResolvedMatches(TObjectName objName, 436 List<String> nameParts, 437 ResolvedImpl resolved) { 438 String columnName = nameParts.get(nameParts.size() - 1); 439 440 if (resolved.isEmpty()) { 441 // No matches found 442 return ResolutionResult.notFound(columnName); 443 } 444 445 // Deduplicate matches by namespace identity 446 // The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope) 447 // but it's still the same source - not truly ambiguous 448 List<ResolvedImpl.Match> uniqueMatches = deduplicateMatchesByNamespace(resolved.getMatches()); 449 450 if (uniqueMatches.size() == 1) { 451 // Exactly one unique namespace - success! 452 ResolvedImpl.Match match = uniqueMatches.get(0); 453 INamespace namespace = match.namespace; 454 455 // For qualified names like "t.col", we need to resolve the column 456 // For unqualified names like "col", we need to find it in the namespace 457 ColumnSource columnSource; 458 459 if (!match.remainingNames.isEmpty()) { 460 // Still have parts to resolve (e.g., found table, need to find column) 461 // Phase B3: Support multi-segment paths (table.column.field...) 462 if (match.remainingNames.size() > 1 && isStructFieldVendor()) { 463 // Multiple segments: use resolveColumnPath for deep field access 464 // e.g., remainingNames = ["customer", "address", "city"] 465 // -> base column "customer", fieldPath ["address", "city"] 466 columnSource = namespace.resolveColumnPath(match.remainingNames); 467 468 if (columnSource == null) { 469 // Base column not found 470 String baseColName = match.remainingNames.get(0); 471 return ResolutionResult.notFound(baseColName, 472 "Column '" + baseColName + "' not found in " + namespace.getDisplayName()); 473 } 474 475 if (DEBUG_RESOLUTION) { 476 System.out.println("[DEBUG-RESOLVE] Deep path resolved: " + 477 match.remainingNames + " -> base=" + columnSource.getExposedName() + 478 ", fieldPath=" + (columnSource.hasFieldPath() ? columnSource.getFieldPath() : "none")); 479 } 480 } else { 481 // Single segment: regular column resolution 482 String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1); 483 columnSource = namespace.resolveColumn(remainingColName); 484 485 if (columnSource == null) { 486 // Column not found in the namespace 487 return ResolutionResult.notFound(remainingColName, 488 "Column '" + remainingColName + "' not found in " + namespace.getDisplayName()); 489 } 490 } 491 } else { 492 // Name resolved to a table/namespace directly with no remaining parts 493 // This happens when the column name matches the table alias exactly. 494 // 495 // For UNNEST tables (e.g., "UNNEST(arr) AS x"), the alias "x" is ALSO 496 // the implicit column name. When user writes "x" as a column reference, 497 // the scope resolution matches the table alias "x", leaving remainingNames empty. 498 // We should still try to resolve "x" as a column in the namespace. 499 // 500 // Example: SELECT x FROM UNNEST([1,2,3]) AS x 501 // Here "x" in SELECT refers to the implicit column, not the table. 502 if (namespace instanceof UnnestNamespace) { 503 // For UNNEST, try to resolve the matched name as a column 504 columnSource = namespace.resolveColumn(columnName); 505 if (columnSource != null) { 506 if (DEBUG_RESOLUTION) { 507 System.out.println("[DEBUG-RESOLVE] UNNEST implicit column resolved: " + 508 columnName + " in " + namespace.getDisplayName()); 509 } 510 return ResolutionResult.exactMatch(columnSource); 511 } 512 } 513 514 // For other namespaces or if column not found, this is an error 515 return ResolutionResult.notFound(columnName, 516 "Name '" + columnName + "' resolves to a table, not a column"); 517 } 518 519 return ResolutionResult.exactMatch(columnSource); 520 } 521 522 // Multiple unique namespaces - truly ambiguous 523 List<ColumnSource> candidates = new ArrayList<>(); 524 525 for (ResolvedImpl.Match match : uniqueMatches) { 526 INamespace namespace = match.namespace; 527 528 // Resolve column in this namespace 529 // Phase B3: Support multi-segment paths 530 ColumnSource columnSource; 531 if (!match.remainingNames.isEmpty()) { 532 if (match.remainingNames.size() > 1 && isStructFieldVendor()) { 533 // Multi-segment: use resolveColumnPath 534 columnSource = namespace.resolveColumnPath(match.remainingNames); 535 } else { 536 // Single segment: use resolveColumn 537 String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1); 538 columnSource = namespace.resolveColumn(remainingColName); 539 } 540 } else { 541 columnSource = namespace.resolveColumn(columnName); 542 } 543 544 if (columnSource != null) { 545 candidates.add(columnSource); 546 } 547 } 548 549 if (candidates.isEmpty()) { 550 // Resolved to tables, but none have the column 551 return ResolutionResult.notFound(columnName, 552 "Column '" + columnName + "' not found in any of " + resolved.getCount() + " tables"); 553 } 554 555 if (candidates.size() == 1) { 556 // Only one table actually has the column 557 return ResolutionResult.exactMatch(candidates.get(0)); 558 } 559 560 // Sort candidates by their table's position in the SQL text (FROM clause order) 561 // This ensures consistent ordering for both ambiguous results and GUESS_COLUMN_STRATEGY 562 sortCandidatesByTablePosition(candidates); 563 564 // Check if all candidates are "inferred" (from tables without DDL metadata) 565 // If so, return AMBIGUOUS regardless of GUESS_COLUMN_STRATEGY 566 // Only apply GUESS_COLUMN_STRATEGY when we have definite knowledge about the columns 567 // 568 // The determination uses configurable thresholds: 569 // - minDefiniteConfidence: minimum confidence to be considered "definite" (default 0.9) 570 // - allowGuessWhenAllInferred: if true, allow guessing even when all candidates are inferred 571 boolean hasDefiniteCandidate = false; 572 double highestConfidence = 0.0; 573 double minDefiniteConf = config != null ? config.getMinDefiniteConfidence() : 0.9; 574 575 for (ColumnSource candidate : candidates) { 576 // A candidate is "definite" if it has high confidence and is not just inferred from usage 577 String evidence = candidate.getEvidence(); 578 double confidence = candidate.getConfidence(); 579 580 // Track highest confidence among candidates 581 if (confidence > highestConfidence) { 582 highestConfidence = confidence; 583 } 584 585 // Use structured evidence if available, otherwise check legacy evidence 586 boolean isInferred; 587 if (candidate.getEvidenceDetail() != null) { 588 // Use the structured evidence's own determination 589 isInferred = !candidate.getEvidenceDetail().isHighConfidence() || 590 candidate.getEvidenceDetail().isInferred(); 591 } else { 592 // Fallback to legacy logic 593 // "inferred_from_usage" means table without DDL metadata - not definite 594 // Low confidence (< minDefiniteConf) means we're guessing - not definite 595 // Null evidence with high confidence is also considered inferred 596 isInferred = (evidence == null || evidence.equals("inferred_from_usage")) || 597 confidence < minDefiniteConf; 598 } 599 600 if (!isInferred) { 601 hasDefiniteCandidate = true; 602 break; 603 } 604 } 605 606 // Check if we should allow guessing when all candidates are inferred 607 boolean allowGuessInferred = config != null && config.isAllowGuessWhenAllInferred(); 608 609 if (!hasDefiniteCandidate && !allowGuessInferred) { 610 // All candidates are from tables without DDL metadata (all inferred) 611 // Don't guess - return as ambiguous so formatter can handle appropriately 612 if (DEBUG_RESOLUTION) { 613 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (all inferred): " + columnName + " with " + candidates.size() + " candidates"); 614 for (ColumnSource c : candidates) { 615 System.out.println("[DEBUG-RESOLVE] - " + (c.getSourceNamespace() != null ? c.getSourceNamespace().getDisplayName() : "null") + 616 " evidence=" + c.getEvidence() + " confidence=" + c.getConfidence()); 617 } 618 } 619 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 620 return ResolutionResult.ambiguous(ambiguous); 621 } 622 623 // Additional check: even if we have definite candidates or allow inferred guessing, 624 // require at least one candidate to meet the minConfidenceToGuess threshold 625 double minConfToGuess = config != null ? config.getMinConfidenceToGuess() : 0.95; 626 if (highestConfidence < minConfToGuess && !allowGuessInferred) { 627 // No candidate has sufficient confidence for guessing 628 if (DEBUG_RESOLUTION) { 629 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (confidence too low): " + columnName + 630 " highest=" + highestConfidence + " required=" + minConfToGuess); 631 } 632 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 633 return ResolutionResult.ambiguous(ambiguous); 634 } 635 636 // Multiple tables have the column - apply GUESS_COLUMN_STRATEGY 637 // Candidates are already sorted by table position (done earlier) 638 int strategy = getGuessColumnStrategy(); 639 640 if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_NEAREST) { 641 // Pick the first candidate (nearest table in FROM clause order) 642 return ResolutionResult.exactMatch(candidates.get(0)); 643 } else if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_FARTHEST) { 644 // Pick the last candidate (farthest table in FROM clause order) 645 return ResolutionResult.exactMatch(candidates.get(candidates.size() - 1)); 646 } 647 648 // GUESS_COLUMN_STRATEGY_NOT_PICKUP: leave as ambiguous 649 if (DEBUG_RESOLUTION) { 650 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (NOT_PICKUP strategy): " + columnName + " with " + candidates.size() + " candidates"); 651 } 652 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 653 return ResolutionResult.ambiguous(ambiguous); 654 } 655 656 /** 657 * Deduplicate matches by namespace identity. 658 * The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope) 659 * but it's still the same source - not truly ambiguous. 660 * 661 * @param matches All matches from scope resolution 662 * @return List of unique matches by namespace identity 663 */ 664 private List<ResolvedImpl.Match> deduplicateMatchesByNamespace(List<ResolvedImpl.Match> matches) { 665 if (matches == null || matches.size() <= 1) { 666 return matches; 667 } 668 669 // Use identity-based set to track unique namespaces 670 java.util.IdentityHashMap<INamespace, ResolvedImpl.Match> uniqueByNamespace = 671 new java.util.IdentityHashMap<>(); 672 673 for (ResolvedImpl.Match match : matches) { 674 if (match.namespace != null && !uniqueByNamespace.containsKey(match.namespace)) { 675 uniqueByNamespace.put(match.namespace, match); 676 } 677 } 678 679 return new ArrayList<>(uniqueByNamespace.values()); 680 } 681 682 /** 683 * Sort candidates by their source table's position in the SQL text. 684 * This ensures that when GUESS_COLUMN_STRATEGY_NEAREST or FARTHEST is applied, 685 * the candidates are ordered according to their actual position in the FROM clause. 686 * 687 * Uses the table's start token (lineNo, columnNo) to determine position. 688 * 689 * @param candidates List of ColumnSource candidates to sort in place 690 */ 691 private void sortCandidatesByTablePosition(List<ColumnSource> candidates) { 692 if (candidates == null || candidates.size() <= 1) { 693 return; 694 } 695 696 candidates.sort(new Comparator<ColumnSource>() { 697 @Override 698 public int compare(ColumnSource c1, ColumnSource c2) { 699 TTable t1 = c1.getFinalTable(); 700 TTable t2 = c2.getFinalTable(); 701 702 // If either table is null, maintain relative order 703 if (t1 == null && t2 == null) return 0; 704 if (t1 == null) return 1; // null tables go to the end 705 if (t2 == null) return -1; 706 707 TSourceToken token1 = t1.getStartToken(); 708 TSourceToken token2 = t2.getStartToken(); 709 710 // If either token is null, maintain relative order 711 if (token1 == null && token2 == null) return 0; 712 if (token1 == null) return 1; 713 if (token2 == null) return -1; 714 715 // Compare by line number first 716 int lineCmp = Long.compare(token1.lineNo, token2.lineNo); 717 if (lineCmp != 0) { 718 return lineCmp; 719 } 720 721 // If same line, compare by column number 722 return Long.compare(token1.columnNo, token2.columnNo); 723 } 724 }); 725 } 726 727 /** 728 * Update TObjectName with the resolution result. 729 * Also registers with ResolutionContext. 730 * 731 * For ambiguous columns (multiple candidate tables), this also populates 732 * TObjectName.candidateTables with all possible source tables. 733 * 734 * IMPORTANT: When resolution fails (notFound) and the column's sourceTable 735 * was set during Phase 1 to an UNNEST table, we clear the sourceTable. 736 * This is because UNNEST tables have a fixed set of columns (implicit column, 737 * offset, struct fields) and should NOT have arbitrary columns inferred. 738 * Clearing sourceTable allows the formatter to treat these as "missed" columns. 739 */ 740 private void updateObjectNameWithResult(TObjectName objName, ResolutionResult result) { 741 // Update TObjectName with resolution result 742 objName.setResolution(result); 743 744 // For notFound results, check if Phase 1 incorrectly linked to UNNEST table 745 // UNNEST tables have a fixed column set - don't allow inferred columns 746 if (!result.isExactMatch() && !result.isAmbiguous()) { 747 TTable currentSourceTable = objName.getSourceTable(); 748 if (currentSourceTable != null && 749 currentSourceTable.getTableType() == ETableSource.unnest) { 750 // Clear the incorrectly set sourceTable from Phase 1 751 // This column wasn't found in the UNNEST namespace, so it shouldn't 752 // be attributed to the UNNEST table 753 objName.setSourceTable(null); 754 if (DEBUG_RESOLUTION) { 755 System.out.println("[DEBUG-RESOLVE] Cleared incorrect UNNEST sourceTable for: " + 756 objName + " (not found in UNNEST namespace)"); 757 } 758 } 759 } 760 761 // For ambiguous results, populate candidateTables with all candidate tables 762 // IMPORTANT: Clear existing candidateTables first, as Phase 1 (linkColumnToTable) 763 // may have added candidates from incorrect scopes (e.g., MERGE target table for 764 // columns inside USING subquery). Phase 2 (NameResolver) has proper scope awareness 765 // and produces the authoritative candidate list. 766 if (result.isAmbiguous() && result.getAmbiguousSource() != null) { 767 AmbiguousColumnSource ambiguous = result.getAmbiguousSource(); 768 // Clear Phase 1 candidates before adding Phase 2's scope-aware candidates 769 objName.getCandidateTables().clear(); 770 for (ColumnSource candidate : ambiguous.getCandidates()) { 771 gudusoft.gsqlparser.nodes.TTable candidateTable = candidate.getFinalTable(); 772 if (candidateTable != null) { 773 objName.getCandidateTables().addTable(candidateTable); 774 } 775 } 776 } 777 778 // Register with context for global querying 779 context.registerResolution(objName, result); 780 } 781 782 /** 783 * Resolve a column within a specific namespace (for direct lookups). 784 */ 785 public ResolutionResult resolveInNamespace(String columnName, INamespace namespace) { 786 if (columnName == null || namespace == null) { 787 return ResolutionResult.notFound("<null>"); 788 } 789 790 ColumnSource source = namespace.resolveColumn(columnName); 791 if (source != null) { 792 return ResolutionResult.exactMatch(source); 793 } 794 795 return ResolutionResult.notFound(columnName); 796 } 797 798 /** 799 * Find all namespaces that contain a given column. 800 * Used for implementing full candidate collection in ambiguous scenarios. 801 */ 802 public List<INamespace> findNamespacesWithColumn(String columnName, IScope scope) { 803 List<INamespace> result = new ArrayList<>(); 804 805 for (INamespace ns : scope.getVisibleNamespaces()) { 806 if (ns.hasColumn(columnName) == ColumnLevel.EXISTS) { 807 result.add(ns); 808 } 809 } 810 811 return result; 812 } 813 814 public INameMatcher getNameMatcher() { 815 return nameMatcher; 816 } 817 818 public ResolutionContext getContext() { 819 return context; 820 } 821}