001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.ETableSource; 005import gudusoft.gsqlparser.TSourceToken; 006import gudusoft.gsqlparser.nodes.TObjectName; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 009import gudusoft.gsqlparser.resolver2.model.AmbiguousColumnSource; 010import gudusoft.gsqlparser.resolver2.model.ColumnSource; 011import gudusoft.gsqlparser.resolver2.model.FieldPath; 012import gudusoft.gsqlparser.resolver2.model.ResolutionContext; 013import gudusoft.gsqlparser.resolver2.model.ResolutionResult; 014import gudusoft.gsqlparser.resolver2.namespace.INamespace; 015import gudusoft.gsqlparser.resolver2.namespace.UnnestNamespace; 016import gudusoft.gsqlparser.resolver2.scope.IScope; 017import gudusoft.gsqlparser.resolver2.scope.ResolvedImpl; 018 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.Collections; 022import java.util.Comparator; 023import java.util.List; 024 025/** 026 * Core component for resolving column references to their sources. 027 * 028 * Key responsibilities: 029 * 1. Resolve TObjectName (column references) using scope tree 030 * 2. Handle qualified and unqualified names 031 * 3. Detect and report ambiguities 032 * 4. Apply GUESS_COLUMN_STRATEGY for ambiguous columns 033 * 5. Update TObjectName with resolution results 034 * 6. Update ResolutionContext for global querying 035 */ 036public class NameResolver { 037 038 private final INameMatcher nameMatcher; 039 private final ResolutionContext context; 040 private final TSQLResolverConfig config; 041 042 /** 043 * Create a NameResolver with full configuration. 044 * 045 * @param config The resolver configuration (includes name matcher and strategy) 046 * @param context The resolution context for tracking results 047 */ 048 public NameResolver(TSQLResolverConfig config, ResolutionContext context) { 049 this.config = config; 050 this.nameMatcher = config.getNameMatcher(); 051 this.context = context; 052 } 053 054 /** 055 * Create a NameResolver with just name matcher (backward compatibility). 056 * Uses default configuration for GUESS_COLUMN_STRATEGY. 057 * 058 * @deprecated Use NameResolver(TSQLResolverConfig, ResolutionContext) instead 059 */ 060 @Deprecated 061 public NameResolver(INameMatcher nameMatcher, ResolutionContext context) { 062 this.nameMatcher = nameMatcher; 063 this.context = context; 064 this.config = null; // Will use TBaseType.GUESS_COLUMN_STRATEGY 065 } 066 067 /** 068 * Get the effective GUESS_COLUMN_STRATEGY. 069 * Returns config value if available, otherwise TBaseType.GUESS_COLUMN_STRATEGY. 070 */ 071 private int getGuessColumnStrategy() { 072 if (config != null) { 073 return config.getGuessColumnStrategy(); 074 } 075 return gudusoft.gsqlparser.TBaseType.GUESS_COLUMN_STRATEGY; 076 } 077 078 private static final boolean DEBUG_RESOLUTION = false; 079 080 /** 081 * Resolve a column reference (TObjectName) within a given scope. 082 * 083 * @param objName The column reference to resolve 084 * @param scope The scope where the reference appears 085 * @return Resolution result 086 */ 087 public ResolutionResult resolve(TObjectName objName, IScope scope) { 088 if (objName == null || scope == null) { 089 return ResolutionResult.notFound("<null>"); 090 } 091 092 // Extract name parts from TObjectName 093 List<String> nameParts = extractNameParts(objName); 094 if (nameParts.isEmpty()) { 095 return ResolutionResult.notFound("<empty>"); 096 } 097 098 if (DEBUG_RESOLUTION) { 099 System.out.println("[DEBUG-RESOLVE] Resolving: " + objName + 100 " nameParts=" + nameParts + " scopeType=" + scope.getScopeType()); 101 } 102 103 // Use scope to resolve the name 104 ResolvedImpl resolved = new ResolvedImpl(); 105 scope.resolve(nameParts, nameMatcher, false, resolved); 106 107 if (DEBUG_RESOLUTION) { 108 System.out.println("[DEBUG-RESOLVE] Resolved matches: " + resolved.getCount()); 109 if (resolved.getCount() > 1) { 110 for (ResolvedImpl.Match m : resolved.getMatches()) { 111 System.out.println("[DEBUG-RESOLVE] Match: " + m.namespace.getDisplayName() + 112 " type=" + m.namespace.getClass().getSimpleName() + 113 " id=" + System.identityHashCode(m.namespace) + 114 " remaining=" + m.remainingNames + 115 " scope=" + m.scope.getScopeType()); 116 } 117 } 118 } 119 120 // Process resolution results 121 ResolutionResult result = processResolvedMatches(objName, nameParts, resolved); 122 123 // Delta 3: Struct-field fallback for BigQuery/Snowflake 124 // If resolution failed and we have a 2-part qualified name like "customer.customer_id", 125 // try interpreting it as column.field (struct field access) instead of table.column 126 // Handles both 2-part (column.field) and multi-part (column.field.subfield) names 127 if (!result.isExactMatch() && nameParts.size() == 2 && isStructFieldVendor()) { 128 ResolutionResult structFieldResult = tryStructFieldFallback(objName, nameParts, scope); 129 if (structFieldResult != null && structFieldResult.isExactMatch()) { 130 result = structFieldResult; 131 if (DEBUG_RESOLUTION) { 132 System.out.println("[DEBUG-RESOLVE] Struct-field fallback succeeded for: " + objName); 133 } 134 } 135 } 136 137 if (DEBUG_RESOLUTION) { 138 System.out.println("[DEBUG-RESOLVE] Result: " + result.getStatus() + 139 (result.isExactMatch() && result.getColumnSource() != null ? 140 " source=" + result.getColumnSource().getExposedName() : "")); 141 } 142 143 // Update TObjectName and context 144 updateObjectNameWithResult(objName, result); 145 146 return result; 147 } 148 149 /** 150 * Check if the current vendor supports struct-field access syntax (column.field). 151 * Currently supported: BigQuery, Snowflake 152 */ 153 private boolean isStructFieldVendor() { 154 if (config == null) { 155 return false; 156 } 157 EDbVendor vendor = config.getVendor(); 158 return vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake; 159 } 160 161 /** 162 * Delta 3: Try to resolve a qualified name as struct-field access (column.field). 163 * 164 * In BigQuery/Snowflake, "customer.customer_id" might be: 165 * 1. Table "customer" with column "customer_id" (standard interpretation) 166 * 2. Column "customer" (STRUCT type) with field "customer_id" (struct-field access) 167 * 168 * If the standard interpretation failed, try the struct-field interpretation: 169 * - Treat the first part as an unqualified column name 170 * - If found, return the base column as the source (the STRUCT column) 171 * - Preserve the field path (segments beyond the base column) for downstream use 172 * 173 * @param objName The original TObjectName 174 * @param nameParts The extracted name parts (e.g., ["customer", "customer_id"]) 175 * @param scope The scope to search in 176 * @return Resolution result if struct-field interpretation succeeds, null otherwise 177 */ 178 private ResolutionResult tryStructFieldFallback(TObjectName objName, List<String> nameParts, IScope scope) { 179 // The base column is the first part (e.g., "customer" in "customer.customer_id") 180 String baseColumnName = nameParts.get(0); 181 182 // The field path is everything after the base column 183 // For "customer.customer_id", fieldPath = ["customer_id"] 184 // For "customer.address.city", fieldPath = ["address", "city"] 185 List<String> fieldPathSegments = nameParts.size() > 1 186 ? nameParts.subList(1, nameParts.size()) 187 : Collections.emptyList(); 188 189 // Try to resolve the base column as an unqualified name 190 List<String> singlePartName = Collections.singletonList(baseColumnName); 191 ResolvedImpl resolved = new ResolvedImpl(); 192 scope.resolve(singlePartName, nameMatcher, false, resolved); 193 194 if (resolved.isEmpty()) { 195 return null; // Base column not found 196 } 197 198 // Found potential matches - check if any namespace has this column 199 for (ResolvedImpl.Match match : resolved.getMatches()) { 200 INamespace namespace = match.namespace; 201 202 // Try to resolve the base column name in this namespace 203 ColumnSource baseColumnSource = namespace.resolveColumn(baseColumnName); 204 if (baseColumnSource != null) { 205 // Found the base column - return it as the source with field path preserved 206 if (DEBUG_RESOLUTION) { 207 System.out.println("[DEBUG-RESOLVE] Struct-field: found base column '" + 208 baseColumnName + "' in " + namespace.getDisplayName() + 209 ", fieldPath=" + fieldPathSegments); 210 } 211 212 // Create a new ColumnSource with: 213 // 1. struct_field_access evidence marker (for backward compatibility) 214 // 2. fieldPath preserved (new in Improvement B) 215 FieldPath fieldPath = FieldPath.of(fieldPathSegments); 216 ColumnSource structFieldSource = baseColumnSource.withFieldPath(fieldPath, "struct_field_access"); 217 218 return ResolutionResult.exactMatch(structFieldSource); 219 } 220 } 221 222 return null; 223 } 224 225 /** 226 * Extract name parts from TObjectName. 227 * Examples: 228 * - "col" -> ["col"] 229 * - "t.col" -> ["t", "col"] 230 * - "schema.table.col" -> ["schema", "table", "col"] 231 * 232 * For BigQuery/Snowflake struct field access like "customer.customer_id": 233 * - If schema/table tokens are not set, but toString() contains dots, 234 * extract all parts from toString() to capture field paths. 235 */ 236 private List<String> extractNameParts(TObjectName objName) { 237 List<String> parts = new ArrayList<>(); 238 239 // Add schema if present 240 if (objName.getSchemaToken() != null) { 241 parts.add(objName.getSchemaString()); 242 } 243 244 // Add table/qualifier if present 245 if (objName.getTableToken() != null) { 246 parts.add(objName.getTableString()); 247 } 248 249 // Add column name 250 String columnName = objName.getColumnNameOnly(); 251 if (columnName != null) { 252 parts.add(columnName); 253 } 254 255 // Improvement B: Handle BigQuery/Snowflake struct field access 256 // If we only got a single part from standard extraction, 257 // but toString() contains more segments (dots), extract them 258 // This handles cases like "customer.customer_id" where: 259 // - getColumnNameOnly() returns "customer" 260 // - toString() returns "customer.customer_id" 261 if (isStructFieldVendor()) { 262 String fullName = objName.toString(); 263 if (fullName != null && fullName.contains(".")) { 264 // Check if fullName has more segments than what we extracted 265 String[] fullParts = splitNameParts(fullName); 266 if (fullParts.length > parts.size() || hasConsecutiveDuplicates(parts)) { 267 // Replace parts with full extracted segments 268 parts.clear(); 269 Collections.addAll(parts, fullParts); 270 } 271 } 272 } 273 274 return parts; 275 } 276 277 /** 278 * Check if a list has consecutive duplicate elements. 279 * This detects parser bugs where segments are duplicated. 280 * Example: ["customer", "customer", "address"] returns true. 281 */ 282 private boolean hasConsecutiveDuplicates(List<String> list) { 283 if (list == null || list.size() < 2) { 284 return false; 285 } 286 for (int i = 1; i < list.size(); i++) { 287 if (list.get(i) != null && list.get(i).equals(list.get(i - 1))) { 288 return true; 289 } 290 } 291 return false; 292 } 293 294 295 /** 296 * Split a dotted name into parts, handling quoted identifiers. 297 * Examples: 298 * - "a.b.c" -> ["a", "b", "c"] 299 * - "`a.b`.c" -> ["`a.b`", "c"] 300 * - "a.`b.c`" -> ["a", "`b.c`"] 301 * 302 * @param name The dotted name string 303 * @return Array of name parts 304 */ 305 private String[] splitNameParts(String name) { 306 if (name == null || name.isEmpty()) { 307 return new String[0]; 308 } 309 310 // Simple case: no quotes, just split on dots 311 if (!name.contains("`") && !name.contains("\"") && !name.contains("[")) { 312 return name.split("\\."); 313 } 314 315 // Complex case: handle quoted identifiers 316 List<String> parts = new ArrayList<>(); 317 StringBuilder current = new StringBuilder(); 318 char quoteChar = 0; 319 320 for (int i = 0; i < name.length(); i++) { 321 char c = name.charAt(i); 322 323 if (quoteChar != 0) { 324 // Inside a quoted identifier 325 current.append(c); 326 if (c == quoteChar) { 327 // Check for escaped quote (doubled) 328 if (i + 1 < name.length() && name.charAt(i + 1) == quoteChar) { 329 current.append(name.charAt(++i)); 330 } else { 331 // End of quoted identifier 332 quoteChar = 0; 333 } 334 } 335 } else if (c == '`' || c == '"' || c == '[') { 336 // Start of quoted identifier 337 quoteChar = (c == '[') ? ']' : c; 338 current.append(c); 339 } else if (c == '.') { 340 // Separator 341 if (current.length() > 0) { 342 parts.add(current.toString()); 343 current.setLength(0); 344 } 345 } else { 346 current.append(c); 347 } 348 } 349 350 // Add last part 351 if (current.length() > 0) { 352 parts.add(current.toString()); 353 } 354 355 return parts.toArray(new String[0]); 356 } 357 358 /** 359 * Process resolution matches and determine final result. 360 */ 361 private ResolutionResult processResolvedMatches(TObjectName objName, 362 List<String> nameParts, 363 ResolvedImpl resolved) { 364 String columnName = nameParts.get(nameParts.size() - 1); 365 366 if (resolved.isEmpty()) { 367 // No matches found 368 return ResolutionResult.notFound(columnName); 369 } 370 371 // Deduplicate matches by namespace identity 372 // The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope) 373 // but it's still the same source - not truly ambiguous 374 List<ResolvedImpl.Match> uniqueMatches = deduplicateMatchesByNamespace(resolved.getMatches()); 375 376 if (uniqueMatches.size() == 1) { 377 // Exactly one unique namespace - success! 378 ResolvedImpl.Match match = uniqueMatches.get(0); 379 INamespace namespace = match.namespace; 380 381 // For qualified names like "t.col", we need to resolve the column 382 // For unqualified names like "col", we need to find it in the namespace 383 ColumnSource columnSource; 384 385 if (!match.remainingNames.isEmpty()) { 386 // Still have parts to resolve (e.g., found table, need to find column) 387 // Phase B3: Support multi-segment paths (table.column.field...) 388 if (match.remainingNames.size() > 1 && isStructFieldVendor()) { 389 // Multiple segments: use resolveColumnPath for deep field access 390 // e.g., remainingNames = ["customer", "address", "city"] 391 // -> base column "customer", fieldPath ["address", "city"] 392 columnSource = namespace.resolveColumnPath(match.remainingNames); 393 394 if (columnSource == null) { 395 // Base column not found 396 String baseColName = match.remainingNames.get(0); 397 return ResolutionResult.notFound(baseColName, 398 "Column '" + baseColName + "' not found in " + namespace.getDisplayName()); 399 } 400 401 if (DEBUG_RESOLUTION) { 402 System.out.println("[DEBUG-RESOLVE] Deep path resolved: " + 403 match.remainingNames + " -> base=" + columnSource.getExposedName() + 404 ", fieldPath=" + (columnSource.hasFieldPath() ? columnSource.getFieldPath() : "none")); 405 } 406 } else { 407 // Single segment: regular column resolution 408 String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1); 409 columnSource = namespace.resolveColumn(remainingColName); 410 411 if (columnSource == null) { 412 // Column not found in the namespace 413 return ResolutionResult.notFound(remainingColName, 414 "Column '" + remainingColName + "' not found in " + namespace.getDisplayName()); 415 } 416 } 417 } else { 418 // Name resolved to a table/namespace directly with no remaining parts 419 // This happens when the column name matches the table alias exactly. 420 // 421 // For UNNEST tables (e.g., "UNNEST(arr) AS x"), the alias "x" is ALSO 422 // the implicit column name. When user writes "x" as a column reference, 423 // the scope resolution matches the table alias "x", leaving remainingNames empty. 424 // We should still try to resolve "x" as a column in the namespace. 425 // 426 // Example: SELECT x FROM UNNEST([1,2,3]) AS x 427 // Here "x" in SELECT refers to the implicit column, not the table. 428 if (namespace instanceof UnnestNamespace) { 429 // For UNNEST, try to resolve the matched name as a column 430 columnSource = namespace.resolveColumn(columnName); 431 if (columnSource != null) { 432 if (DEBUG_RESOLUTION) { 433 System.out.println("[DEBUG-RESOLVE] UNNEST implicit column resolved: " + 434 columnName + " in " + namespace.getDisplayName()); 435 } 436 return ResolutionResult.exactMatch(columnSource); 437 } 438 } 439 440 // For other namespaces or if column not found, this is an error 441 return ResolutionResult.notFound(columnName, 442 "Name '" + columnName + "' resolves to a table, not a column"); 443 } 444 445 return ResolutionResult.exactMatch(columnSource); 446 } 447 448 // Multiple unique namespaces - truly ambiguous 449 List<ColumnSource> candidates = new ArrayList<>(); 450 451 for (ResolvedImpl.Match match : uniqueMatches) { 452 INamespace namespace = match.namespace; 453 454 // Resolve column in this namespace 455 // Phase B3: Support multi-segment paths 456 ColumnSource columnSource; 457 if (!match.remainingNames.isEmpty()) { 458 if (match.remainingNames.size() > 1 && isStructFieldVendor()) { 459 // Multi-segment: use resolveColumnPath 460 columnSource = namespace.resolveColumnPath(match.remainingNames); 461 } else { 462 // Single segment: use resolveColumn 463 String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1); 464 columnSource = namespace.resolveColumn(remainingColName); 465 } 466 } else { 467 columnSource = namespace.resolveColumn(columnName); 468 } 469 470 if (columnSource != null) { 471 candidates.add(columnSource); 472 } 473 } 474 475 if (candidates.isEmpty()) { 476 // Resolved to tables, but none have the column 477 return ResolutionResult.notFound(columnName, 478 "Column '" + columnName + "' not found in any of " + resolved.getCount() + " tables"); 479 } 480 481 if (candidates.size() == 1) { 482 // Only one table actually has the column 483 return ResolutionResult.exactMatch(candidates.get(0)); 484 } 485 486 // Sort candidates by their table's position in the SQL text (FROM clause order) 487 // This ensures consistent ordering for both ambiguous results and GUESS_COLUMN_STRATEGY 488 sortCandidatesByTablePosition(candidates); 489 490 // Check if all candidates are "inferred" (from tables without DDL metadata) 491 // If so, return AMBIGUOUS regardless of GUESS_COLUMN_STRATEGY 492 // Only apply GUESS_COLUMN_STRATEGY when we have definite knowledge about the columns 493 // 494 // The determination uses configurable thresholds: 495 // - minDefiniteConfidence: minimum confidence to be considered "definite" (default 0.9) 496 // - allowGuessWhenAllInferred: if true, allow guessing even when all candidates are inferred 497 boolean hasDefiniteCandidate = false; 498 double highestConfidence = 0.0; 499 double minDefiniteConf = config != null ? config.getMinDefiniteConfidence() : 0.9; 500 501 for (ColumnSource candidate : candidates) { 502 // A candidate is "definite" if it has high confidence and is not just inferred from usage 503 String evidence = candidate.getEvidence(); 504 double confidence = candidate.getConfidence(); 505 506 // Track highest confidence among candidates 507 if (confidence > highestConfidence) { 508 highestConfidence = confidence; 509 } 510 511 // Use structured evidence if available, otherwise check legacy evidence 512 boolean isInferred; 513 if (candidate.getEvidenceDetail() != null) { 514 // Use the structured evidence's own determination 515 isInferred = !candidate.getEvidenceDetail().isHighConfidence() || 516 candidate.getEvidenceDetail().isInferred(); 517 } else { 518 // Fallback to legacy logic 519 // "inferred_from_usage" means table without DDL metadata - not definite 520 // Low confidence (< minDefiniteConf) means we're guessing - not definite 521 // Null evidence with high confidence is also considered inferred 522 isInferred = (evidence == null || evidence.equals("inferred_from_usage")) || 523 confidence < minDefiniteConf; 524 } 525 526 if (!isInferred) { 527 hasDefiniteCandidate = true; 528 break; 529 } 530 } 531 532 // Check if we should allow guessing when all candidates are inferred 533 boolean allowGuessInferred = config != null && config.isAllowGuessWhenAllInferred(); 534 535 if (!hasDefiniteCandidate && !allowGuessInferred) { 536 // All candidates are from tables without DDL metadata (all inferred) 537 // Don't guess - return as ambiguous so formatter can handle appropriately 538 if (DEBUG_RESOLUTION) { 539 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (all inferred): " + columnName + " with " + candidates.size() + " candidates"); 540 for (ColumnSource c : candidates) { 541 System.out.println("[DEBUG-RESOLVE] - " + (c.getSourceNamespace() != null ? c.getSourceNamespace().getDisplayName() : "null") + 542 " evidence=" + c.getEvidence() + " confidence=" + c.getConfidence()); 543 } 544 } 545 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 546 return ResolutionResult.ambiguous(ambiguous); 547 } 548 549 // Additional check: even if we have definite candidates or allow inferred guessing, 550 // require at least one candidate to meet the minConfidenceToGuess threshold 551 double minConfToGuess = config != null ? config.getMinConfidenceToGuess() : 0.95; 552 if (highestConfidence < minConfToGuess && !allowGuessInferred) { 553 // No candidate has sufficient confidence for guessing 554 if (DEBUG_RESOLUTION) { 555 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (confidence too low): " + columnName + 556 " highest=" + highestConfidence + " required=" + minConfToGuess); 557 } 558 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 559 return ResolutionResult.ambiguous(ambiguous); 560 } 561 562 // Multiple tables have the column - apply GUESS_COLUMN_STRATEGY 563 // Candidates are already sorted by table position (done earlier) 564 int strategy = getGuessColumnStrategy(); 565 566 if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_NEAREST) { 567 // Pick the first candidate (nearest table in FROM clause order) 568 return ResolutionResult.exactMatch(candidates.get(0)); 569 } else if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_FARTHEST) { 570 // Pick the last candidate (farthest table in FROM clause order) 571 return ResolutionResult.exactMatch(candidates.get(candidates.size() - 1)); 572 } 573 574 // GUESS_COLUMN_STRATEGY_NOT_PICKUP: leave as ambiguous 575 if (DEBUG_RESOLUTION) { 576 System.out.println("[DEBUG-RESOLVE] AMBIGUOUS (NOT_PICKUP strategy): " + columnName + " with " + candidates.size() + " candidates"); 577 } 578 AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates); 579 return ResolutionResult.ambiguous(ambiguous); 580 } 581 582 /** 583 * Deduplicate matches by namespace identity. 584 * The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope) 585 * but it's still the same source - not truly ambiguous. 586 * 587 * @param matches All matches from scope resolution 588 * @return List of unique matches by namespace identity 589 */ 590 private List<ResolvedImpl.Match> deduplicateMatchesByNamespace(List<ResolvedImpl.Match> matches) { 591 if (matches == null || matches.size() <= 1) { 592 return matches; 593 } 594 595 // Use identity-based set to track unique namespaces 596 java.util.IdentityHashMap<INamespace, ResolvedImpl.Match> uniqueByNamespace = 597 new java.util.IdentityHashMap<>(); 598 599 for (ResolvedImpl.Match match : matches) { 600 if (match.namespace != null && !uniqueByNamespace.containsKey(match.namespace)) { 601 uniqueByNamespace.put(match.namespace, match); 602 } 603 } 604 605 return new ArrayList<>(uniqueByNamespace.values()); 606 } 607 608 /** 609 * Sort candidates by their source table's position in the SQL text. 610 * This ensures that when GUESS_COLUMN_STRATEGY_NEAREST or FARTHEST is applied, 611 * the candidates are ordered according to their actual position in the FROM clause. 612 * 613 * Uses the table's start token (lineNo, columnNo) to determine position. 614 * 615 * @param candidates List of ColumnSource candidates to sort in place 616 */ 617 private void sortCandidatesByTablePosition(List<ColumnSource> candidates) { 618 if (candidates == null || candidates.size() <= 1) { 619 return; 620 } 621 622 candidates.sort(new Comparator<ColumnSource>() { 623 @Override 624 public int compare(ColumnSource c1, ColumnSource c2) { 625 TTable t1 = c1.getFinalTable(); 626 TTable t2 = c2.getFinalTable(); 627 628 // If either table is null, maintain relative order 629 if (t1 == null && t2 == null) return 0; 630 if (t1 == null) return 1; // null tables go to the end 631 if (t2 == null) return -1; 632 633 TSourceToken token1 = t1.getStartToken(); 634 TSourceToken token2 = t2.getStartToken(); 635 636 // If either token is null, maintain relative order 637 if (token1 == null && token2 == null) return 0; 638 if (token1 == null) return 1; 639 if (token2 == null) return -1; 640 641 // Compare by line number first 642 int lineCmp = Long.compare(token1.lineNo, token2.lineNo); 643 if (lineCmp != 0) { 644 return lineCmp; 645 } 646 647 // If same line, compare by column number 648 return Long.compare(token1.columnNo, token2.columnNo); 649 } 650 }); 651 } 652 653 /** 654 * Update TObjectName with the resolution result. 655 * Also registers with ResolutionContext. 656 * 657 * For ambiguous columns (multiple candidate tables), this also populates 658 * TObjectName.candidateTables with all possible source tables. 659 * 660 * IMPORTANT: When resolution fails (notFound) and the column's sourceTable 661 * was set during Phase 1 to an UNNEST table, we clear the sourceTable. 662 * This is because UNNEST tables have a fixed set of columns (implicit column, 663 * offset, struct fields) and should NOT have arbitrary columns inferred. 664 * Clearing sourceTable allows the formatter to treat these as "missed" columns. 665 */ 666 private void updateObjectNameWithResult(TObjectName objName, ResolutionResult result) { 667 // Update TObjectName with resolution result 668 objName.setResolution(result); 669 670 // For notFound results, check if Phase 1 incorrectly linked to UNNEST table 671 // UNNEST tables have a fixed column set - don't allow inferred columns 672 if (!result.isExactMatch() && !result.isAmbiguous()) { 673 TTable currentSourceTable = objName.getSourceTable(); 674 if (currentSourceTable != null && 675 currentSourceTable.getTableType() == ETableSource.unnest) { 676 // Clear the incorrectly set sourceTable from Phase 1 677 // This column wasn't found in the UNNEST namespace, so it shouldn't 678 // be attributed to the UNNEST table 679 objName.setSourceTable(null); 680 if (DEBUG_RESOLUTION) { 681 System.out.println("[DEBUG-RESOLVE] Cleared incorrect UNNEST sourceTable for: " + 682 objName + " (not found in UNNEST namespace)"); 683 } 684 } 685 } 686 687 // For ambiguous results, populate candidateTables with all candidate tables 688 // IMPORTANT: Clear existing candidateTables first, as Phase 1 (linkColumnToTable) 689 // may have added candidates from incorrect scopes (e.g., MERGE target table for 690 // columns inside USING subquery). Phase 2 (NameResolver) has proper scope awareness 691 // and produces the authoritative candidate list. 692 if (result.isAmbiguous() && result.getAmbiguousSource() != null) { 693 AmbiguousColumnSource ambiguous = result.getAmbiguousSource(); 694 // Clear Phase 1 candidates before adding Phase 2's scope-aware candidates 695 objName.getCandidateTables().clear(); 696 for (ColumnSource candidate : ambiguous.getCandidates()) { 697 gudusoft.gsqlparser.nodes.TTable candidateTable = candidate.getFinalTable(); 698 if (candidateTable != null) { 699 objName.getCandidateTables().addTable(candidateTable); 700 } 701 } 702 } 703 704 // Register with context for global querying 705 context.registerResolution(objName, result); 706 } 707 708 /** 709 * Resolve a column within a specific namespace (for direct lookups). 710 */ 711 public ResolutionResult resolveInNamespace(String columnName, INamespace namespace) { 712 if (columnName == null || namespace == null) { 713 return ResolutionResult.notFound("<null>"); 714 } 715 716 ColumnSource source = namespace.resolveColumn(columnName); 717 if (source != null) { 718 return ResolutionResult.exactMatch(source); 719 } 720 721 return ResolutionResult.notFound(columnName); 722 } 723 724 /** 725 * Find all namespaces that contain a given column. 726 * Used for implementing full candidate collection in ambiguous scenarios. 727 */ 728 public List<INamespace> findNamespacesWithColumn(String columnName, IScope scope) { 729 List<INamespace> result = new ArrayList<>(); 730 731 for (INamespace ns : scope.getVisibleNamespaces()) { 732 if (ns.hasColumn(columnName) == ColumnLevel.EXISTS) { 733 result.add(ns); 734 } 735 } 736 737 return result; 738 } 739 740 public INameMatcher getNameMatcher() { 741 return nameMatcher; 742 } 743 744 public ResolutionContext getContext() { 745 return context; 746 } 747}