Source code

001package gudusoft.gsqlparser.resolver2;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.ETableSource;
005import gudusoft.gsqlparser.TSourceToken;
006import gudusoft.gsqlparser.nodes.TObjectName;
007import gudusoft.gsqlparser.nodes.TTable;
008import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
009import gudusoft.gsqlparser.resolver2.model.AmbiguousColumnSource;
010import gudusoft.gsqlparser.resolver2.model.ColumnSource;
011import gudusoft.gsqlparser.resolver2.model.FieldPath;
012import gudusoft.gsqlparser.resolver2.model.ResolutionContext;
013import gudusoft.gsqlparser.resolver2.model.ResolutionResult;
014import gudusoft.gsqlparser.resolver2.namespace.INamespace;
015import gudusoft.gsqlparser.resolver2.namespace.UnnestNamespace;
016import gudusoft.gsqlparser.resolver2.scope.IScope;
017import gudusoft.gsqlparser.resolver2.scope.ResolvedImpl;
018
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.Collections;
022import java.util.Comparator;
023import java.util.List;
024
025/**
026 * Core component for resolving column references to their sources.
027 *
028 * Key responsibilities:
029 * 1. Resolve TObjectName (column references) using scope tree
030 * 2. Handle qualified and unqualified names
031 * 3. Detect and report ambiguities
032 * 4. Apply GUESS_COLUMN_STRATEGY for ambiguous columns
033 * 5. Update TObjectName with resolution results
034 * 6. Update ResolutionContext for global querying
035 */
036public class NameResolver {
037
038    private final INameMatcher nameMatcher;
039    private final ResolutionContext context;
040    private final TSQLResolverConfig config;
041
042    /**
043     * Create a NameResolver with full configuration.
044     *
045     * @param config The resolver configuration (includes name matcher and strategy)
046     * @param context The resolution context for tracking results
047     */
048    public NameResolver(TSQLResolverConfig config, ResolutionContext context) {
049        this.config = config;
050        this.nameMatcher = config.getNameMatcher();
051        this.context = context;
052    }
053
054    /**
055     * Create a NameResolver with just name matcher (backward compatibility).
056     * Uses default configuration for GUESS_COLUMN_STRATEGY.
057     *
058     * @deprecated Use NameResolver(TSQLResolverConfig, ResolutionContext) instead
059     */
060    @Deprecated
061    public NameResolver(INameMatcher nameMatcher, ResolutionContext context) {
062        this.nameMatcher = nameMatcher;
063        this.context = context;
064        this.config = null; // Will use TBaseType.GUESS_COLUMN_STRATEGY
065    }
066
067    /**
068     * Get the effective GUESS_COLUMN_STRATEGY.
069     * Returns config value if available, otherwise TBaseType.GUESS_COLUMN_STRATEGY.
070     */
071    private int getGuessColumnStrategy() {
072        if (config != null) {
073            return config.getGuessColumnStrategy();
074        }
075        return gudusoft.gsqlparser.TBaseType.GUESS_COLUMN_STRATEGY;
076    }
077
078    private static final boolean DEBUG_RESOLUTION = false;
079
080    /**
081     * Resolve a column reference (TObjectName) within a given scope.
082     *
083     * @param objName The column reference to resolve
084     * @param scope The scope where the reference appears
085     * @return Resolution result
086     */
087    public ResolutionResult resolve(TObjectName objName, IScope scope) {
088        if (objName == null || scope == null) {
089            return ResolutionResult.notFound("<null>");
090        }
091
092        // Extract name parts from TObjectName
093        List<String> nameParts = extractNameParts(objName);
094        if (nameParts.isEmpty()) {
095            return ResolutionResult.notFound("<empty>");
096        }
097
098        if (DEBUG_RESOLUTION) {
099            System.out.println("[DEBUG-RESOLVE] Resolving: " + objName +
100                " nameParts=" + nameParts + " scopeType=" + scope.getScopeType());
101        }
102
103        // Use scope to resolve the name
104        ResolvedImpl resolved = new ResolvedImpl();
105        scope.resolve(nameParts, nameMatcher, false, resolved);
106
107        if (DEBUG_RESOLUTION) {
108            System.out.println("[DEBUG-RESOLVE]   Resolved matches: " + resolved.getCount());
109            if (resolved.getCount() > 1) {
110                for (ResolvedImpl.Match m : resolved.getMatches()) {
111                    System.out.println("[DEBUG-RESOLVE]     Match: " + m.namespace.getDisplayName() +
112                        " type=" + m.namespace.getClass().getSimpleName() +
113                        " id=" + System.identityHashCode(m.namespace) +
114                        " remaining=" + m.remainingNames +
115                        " scope=" + m.scope.getScopeType());
116                }
117            }
118        }
119
120        // Process resolution results
121        ResolutionResult result = processResolvedMatches(objName, nameParts, resolved);
122
123        // Delta 3: Struct-field fallback for BigQuery/Snowflake
124        // If resolution failed and we have a 2-part qualified name like "customer.customer_id",
125        // try interpreting it as column.field (struct field access) instead of table.column.
126        // Only for 2-part names; 3+ part no-alias case (e.g., customer.address.city) is not
127        // handled here to avoid changing resolution paths for existing 3-part BigQuery patterns
128        // (e.g., purchases.first.msts) that are correctly handled by DataFlowAnalyzer heuristics.
129        // The alias case (o.customer.address.city) IS handled by resolveColumnPath() in
130        // processResolvedMatches() and does not depend on this fallback.
131        if (!result.isExactMatch() && nameParts.size() == 2 && isStructFieldVendor()) {
132            ResolutionResult structFieldResult = tryStructFieldFallback(objName, nameParts, scope);
133            if (structFieldResult != null && structFieldResult.isExactMatch()) {
134                result = structFieldResult;
135                if (DEBUG_RESOLUTION) {
136                    System.out.println("[DEBUG-RESOLVE]   Struct-field fallback succeeded for: " + objName);
137                }
138            }
139        }
140
141        // Delta 4: Side-channel hint for 3+ part no-alias struct access (BigQuery only).
142        // When resolution failed and we have 3+ parts (e.g., customer.address.city),
143        // set a StructFieldHint WITHOUT changing the resolution result or sourceTable.
144        // This provides struct path info to DataFlowAnalyzer without altering lineage topology.
145        if (!result.isExactMatch() && nameParts.size() >= 3 && isStructFieldHintVendor()) {
146            trySetStructFieldHint(objName, nameParts, scope);
147        }
148
149        if (DEBUG_RESOLUTION) {
150            System.out.println("[DEBUG-RESOLVE]   Result: " + result.getStatus() +
151                (result.isExactMatch() && result.getColumnSource() != null ?
152                    " source=" + result.getColumnSource().getExposedName() : ""));
153        }
154
155        // Update TObjectName and context
156        updateObjectNameWithResult(objName, result);
157
158        return result;
159    }
160
161    /**
162     * Check if the current vendor supports struct-field access syntax (column.field).
163     * Currently supported: BigQuery, Snowflake
164     */
165    private boolean isStructFieldVendor() {
166        if (config == null) {
167            return false;
168        }
169        EDbVendor vendor = config.getVendor();
170        return vendor == EDbVendor.dbvbigquery || vendor == EDbVendor.dbvsnowflake;
171    }
172
173    /**
174     * Check if the current vendor supports struct-field hint annotations.
175     * Currently BigQuery only (Snowflake uses schema.table.column for 3-part names).
176     */
177    private boolean isStructFieldHintVendor() {
178        if (config == null) {
179            return false;
180        }
181        return config.getVendor() == EDbVendor.dbvbigquery;
182    }
183
184    /**
185     * Delta 4: Try to set a StructFieldHint for 3+ part no-alias struct access.
186     *
187     * For "customer.address.city" (3 parts, no alias):
188     * - Treat the first part ("customer") as a potential base column
189     * - If found in any visible namespace, set a hint with fieldPath=["address", "city"]
190     * - Does NOT change ResolutionResult or sourceTable
191     *
192     * @param objName The TObjectName to annotate with a hint
193     * @param nameParts The extracted name parts (e.g., ["customer", "address", "city"])
194     * @param scope The scope to search in
195     */
196    private void trySetStructFieldHint(TObjectName objName, List<String> nameParts, IScope scope) {
197        String baseColumnName = nameParts.get(0);
198        List<String> fieldPathSegments = nameParts.subList(1, nameParts.size());
199
200        // Try to find the base column as an unqualified name
201        List<String> singlePartName = Collections.singletonList(baseColumnName);
202        ResolvedImpl resolved = new ResolvedImpl();
203        scope.resolve(singlePartName, nameMatcher, false, resolved);
204
205        if (resolved.isEmpty()) {
206            return; // Base column not found in any namespace
207        }
208
209        // Check if any namespace actually has this column
210        for (ResolvedImpl.Match match : resolved.getMatches()) {
211            gudusoft.gsqlparser.resolver2.model.ColumnSource baseColumnSource =
212                match.namespace.resolveColumn(baseColumnName);
213            if (baseColumnSource != null) {
214                // Found the base column - create hint
215                gudusoft.gsqlparser.resolver2.model.FieldPath fieldPath =
216                    gudusoft.gsqlparser.resolver2.model.FieldPath.of(fieldPathSegments);
217                gudusoft.gsqlparser.resolver2.model.StructFieldHint hint =
218                    new gudusoft.gsqlparser.resolver2.model.StructFieldHint(
219                        baseColumnName, fieldPath,
220                        "struct_field_hint_no_alias", 0.7);
221                objName.setStructFieldHint(hint);
222
223                if (DEBUG_RESOLUTION) {
224                    System.out.println("[DEBUG-RESOLVE]   StructFieldHint set for: " + objName +
225                        " -> base=" + baseColumnName + ", fieldPath=" + fieldPathSegments);
226                }
227                return;
228            }
229        }
230    }
231
232    /**
233     * Delta 3: Try to resolve a qualified name as struct-field access (column.field).
234     *
235     * In BigQuery/Snowflake, "customer.customer_id" might be:
236     * 1. Table "customer" with column "customer_id" (standard interpretation)
237     * 2. Column "customer" (STRUCT type) with field "customer_id" (struct-field access)
238     *
239     * If the standard interpretation failed, try the struct-field interpretation:
240     * - Treat the first part as an unqualified column name
241     * - If found, return the base column as the source (the STRUCT column)
242     * - Preserve the field path (segments beyond the base column) for downstream use
243     *
244     * @param objName The original TObjectName
245     * @param nameParts The extracted name parts (e.g., ["customer", "customer_id"])
246     * @param scope The scope to search in
247     * @return Resolution result if struct-field interpretation succeeds, null otherwise
248     */
249    private ResolutionResult tryStructFieldFallback(TObjectName objName, List<String> nameParts, IScope scope) {
250        // The base column is the first part (e.g., "customer" in "customer.customer_id")
251        String baseColumnName = nameParts.get(0);
252
253        // The field path is everything after the base column
254        // For "customer.customer_id", fieldPath = ["customer_id"]
255        // For "customer.address.city", fieldPath = ["address", "city"]
256        List<String> fieldPathSegments = nameParts.size() > 1
257            ? nameParts.subList(1, nameParts.size())
258            : Collections.emptyList();
259
260        // Try to resolve the base column as an unqualified name
261        List<String> singlePartName = Collections.singletonList(baseColumnName);
262        ResolvedImpl resolved = new ResolvedImpl();
263        scope.resolve(singlePartName, nameMatcher, false, resolved);
264
265        if (resolved.isEmpty()) {
266            return null; // Base column not found
267        }
268
269        // Found potential matches - check if any namespace has this column
270        for (ResolvedImpl.Match match : resolved.getMatches()) {
271            INamespace namespace = match.namespace;
272
273            // Try to resolve the base column name in this namespace
274            ColumnSource baseColumnSource = namespace.resolveColumn(baseColumnName);
275            if (baseColumnSource != null) {
276                // Found the base column - return it as the source with field path preserved
277                if (DEBUG_RESOLUTION) {
278                    System.out.println("[DEBUG-RESOLVE]   Struct-field: found base column '" +
279                        baseColumnName + "' in " + namespace.getDisplayName() +
280                        ", fieldPath=" + fieldPathSegments);
281                }
282
283                // Create a new ColumnSource with:
284                // 1. struct_field_access evidence marker (for backward compatibility)
285                // 2. fieldPath preserved (new in Improvement B)
286                FieldPath fieldPath = FieldPath.of(fieldPathSegments);
287                ColumnSource structFieldSource = baseColumnSource.withFieldPath(fieldPath, "struct_field_access");
288
289                return ResolutionResult.exactMatch(structFieldSource);
290            }
291        }
292
293        return null;
294    }
295
296    /**
297     * Extract name parts from TObjectName.
298     * Examples:
299     * - "col" -> ["col"]
300     * - "t.col" -> ["t", "col"]
301     * - "schema.table.col" -> ["schema", "table", "col"]
302     *
303     * For BigQuery/Snowflake struct field access like "customer.customer_id":
304     * - If schema/table tokens are not set, but toString() contains dots,
305     *   extract all parts from toString() to capture field paths.
306     */
307    private List<String> extractNameParts(TObjectName objName) {
308        List<String> parts = new ArrayList<>();
309
310        // Add schema if present, but NOT when databaseToken is also set.
311        // When databaseToken is present, the name is fully qualified (db.schema.table.column)
312        // and the schema position IS the schema, not a table alias. Including it would cause
313        // the resolver to incorrectly match the schema against table aliases. (Mantis #4268)
314        if (objName.getSchemaToken() != null && objName.getDatabaseToken() == null) {
315            parts.add(objName.getSchemaString());
316        }
317
318        // Add table/qualifier if present
319        if (objName.getTableToken() != null) {
320            parts.add(objName.getTableString());
321        }
322
323        // Add column name
324        String columnName = objName.getColumnNameOnly();
325        if (columnName != null) {
326            parts.add(columnName);
327        }
328
329        // Improvement B: Handle BigQuery/Snowflake struct field access
330        // If we only got a single part from standard extraction,
331        // but toString() contains more segments (dots), extract them
332        // This handles cases like "customer.customer_id" where:
333        // - getColumnNameOnly() returns "customer"
334        // - toString() returns "customer.customer_id"
335        if (isStructFieldVendor()) {
336            String fullName = objName.toString();
337            if (fullName != null && fullName.contains(".")) {
338                // Check if fullName has more segments than what we extracted
339                String[] fullParts = splitNameParts(fullName);
340                if (fullParts.length > parts.size() || hasConsecutiveDuplicates(parts)) {
341                    // Replace parts with full extracted segments
342                    parts.clear();
343                    Collections.addAll(parts, fullParts);
344                }
345            }
346        }
347
348        return parts;
349    }
350
351    /**
352     * Check if a list has consecutive duplicate elements.
353     * This detects parser bugs where segments are duplicated.
354     * Example: ["customer", "customer", "address"] returns true.
355     */
356    private boolean hasConsecutiveDuplicates(List<String> list) {
357        if (list == null || list.size() < 2) {
358            return false;
359        }
360        for (int i = 1; i < list.size(); i++) {
361            if (list.get(i) != null && list.get(i).equals(list.get(i - 1))) {
362                return true;
363            }
364        }
365        return false;
366    }
367
368
369    /**
370     * Split a dotted name into parts, handling quoted identifiers.
371     * Examples:
372     * - "a.b.c" -> ["a", "b", "c"]
373     * - "`a.b`.c" -> ["`a.b`", "c"]
374     * - "a.`b.c`" -> ["a", "`b.c`"]
375     *
376     * @param name The dotted name string
377     * @return Array of name parts
378     */
379    private String[] splitNameParts(String name) {
380        if (name == null || name.isEmpty()) {
381            return new String[0];
382        }
383
384        // Simple case: no quotes, just split on dots
385        if (!name.contains("`") && !name.contains("\"") && !name.contains("[")) {
386            return name.split("\\.");
387        }
388
389        // Complex case: handle quoted identifiers
390        List<String> parts = new ArrayList<>();
391        StringBuilder current = new StringBuilder();
392        char quoteChar = 0;
393
394        for (int i = 0; i < name.length(); i++) {
395            char c = name.charAt(i);
396
397            if (quoteChar != 0) {
398                // Inside a quoted identifier
399                current.append(c);
400                if (c == quoteChar) {
401                    // Check for escaped quote (doubled)
402                    if (i + 1 < name.length() && name.charAt(i + 1) == quoteChar) {
403                        current.append(name.charAt(++i));
404                    } else {
405                        // End of quoted identifier
406                        quoteChar = 0;
407                    }
408                }
409            } else if (c == '`' || c == '"' || c == '[') {
410                // Start of quoted identifier
411                quoteChar = (c == '[') ? ']' : c;
412                current.append(c);
413            } else if (c == '.') {
414                // Separator
415                if (current.length() > 0) {
416                    parts.add(current.toString());
417                    current.setLength(0);
418                }
419            } else {
420                current.append(c);
421            }
422        }
423
424        // Add last part
425        if (current.length() > 0) {
426            parts.add(current.toString());
427        }
428
429        return parts.toArray(new String[0]);
430    }
431
432    /**
433     * Process resolution matches and determine final result.
434     */
435    private ResolutionResult processResolvedMatches(TObjectName objName,
436                                                    List<String> nameParts,
437                                                    ResolvedImpl resolved) {
438        String columnName = nameParts.get(nameParts.size() - 1);
439
440        if (resolved.isEmpty()) {
441            // No matches found
442            return ResolutionResult.notFound(columnName);
443        }
444
445        // Deduplicate matches by namespace identity
446        // The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope)
447        // but it's still the same source - not truly ambiguous
448        List<ResolvedImpl.Match> uniqueMatches = deduplicateMatchesByNamespace(resolved.getMatches());
449
450        if (uniqueMatches.size() == 1) {
451            // Exactly one unique namespace - success!
452            ResolvedImpl.Match match = uniqueMatches.get(0);
453            INamespace namespace = match.namespace;
454
455            // For qualified names like "t.col", we need to resolve the column
456            // For unqualified names like "col", we need to find it in the namespace
457            ColumnSource columnSource;
458
459            if (!match.remainingNames.isEmpty()) {
460                // Still have parts to resolve (e.g., found table, need to find column)
461                // Phase B3: Support multi-segment paths (table.column.field...)
462                if (match.remainingNames.size() > 1 && isStructFieldVendor()) {
463                    // Multiple segments: use resolveColumnPath for deep field access
464                    // e.g., remainingNames = ["customer", "address", "city"]
465                    // -> base column "customer", fieldPath ["address", "city"]
466                    columnSource = namespace.resolveColumnPath(match.remainingNames);
467
468                    if (columnSource == null) {
469                        // Base column not found
470                        String baseColName = match.remainingNames.get(0);
471                        return ResolutionResult.notFound(baseColName,
472                            "Column '" + baseColName + "' not found in " + namespace.getDisplayName());
473                    }
474
475                    if (DEBUG_RESOLUTION) {
476                        System.out.println("[DEBUG-RESOLVE]   Deep path resolved: " +
477                            match.remainingNames + " -> base=" + columnSource.getExposedName() +
478                            ", fieldPath=" + (columnSource.hasFieldPath() ? columnSource.getFieldPath() : "none"));
479                    }
480                } else {
481                    // Single segment: regular column resolution
482                    String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1);
483                    columnSource = namespace.resolveColumn(remainingColName);
484
485                    if (columnSource == null) {
486                        // Column not found in the namespace
487                        return ResolutionResult.notFound(remainingColName,
488                            "Column '" + remainingColName + "' not found in " + namespace.getDisplayName());
489                    }
490                }
491            } else {
492                // Name resolved to a table/namespace directly with no remaining parts
493                // This happens when the column name matches the table alias exactly.
494                //
495                // For UNNEST tables (e.g., "UNNEST(arr) AS x"), the alias "x" is ALSO
496                // the implicit column name. When user writes "x" as a column reference,
497                // the scope resolution matches the table alias "x", leaving remainingNames empty.
498                // We should still try to resolve "x" as a column in the namespace.
499                //
500                // Example: SELECT x FROM UNNEST([1,2,3]) AS x
501                // Here "x" in SELECT refers to the implicit column, not the table.
502                if (namespace instanceof UnnestNamespace) {
503                    // For UNNEST, try to resolve the matched name as a column
504                    columnSource = namespace.resolveColumn(columnName);
505                    if (columnSource != null) {
506                        if (DEBUG_RESOLUTION) {
507                            System.out.println("[DEBUG-RESOLVE]   UNNEST implicit column resolved: " +
508                                columnName + " in " + namespace.getDisplayName());
509                        }
510                        return ResolutionResult.exactMatch(columnSource);
511                    }
512                }
513
514                // For other namespaces or if column not found, this is an error
515                return ResolutionResult.notFound(columnName,
516                    "Name '" + columnName + "' resolves to a table, not a column");
517            }
518
519            return ResolutionResult.exactMatch(columnSource);
520        }
521
522        // Multiple unique namespaces - truly ambiguous
523        List<ColumnSource> candidates = new ArrayList<>();
524
525        for (ResolvedImpl.Match match : uniqueMatches) {
526            INamespace namespace = match.namespace;
527
528            // Resolve column in this namespace
529            // Phase B3: Support multi-segment paths
530            ColumnSource columnSource;
531            if (!match.remainingNames.isEmpty()) {
532                if (match.remainingNames.size() > 1 && isStructFieldVendor()) {
533                    // Multi-segment: use resolveColumnPath
534                    columnSource = namespace.resolveColumnPath(match.remainingNames);
535                } else {
536                    // Single segment: use resolveColumn
537                    String remainingColName = match.remainingNames.get(match.remainingNames.size() - 1);
538                    columnSource = namespace.resolveColumn(remainingColName);
539                }
540            } else {
541                columnSource = namespace.resolveColumn(columnName);
542            }
543
544            if (columnSource != null) {
545                candidates.add(columnSource);
546            }
547        }
548
549        if (candidates.isEmpty()) {
550            // Resolved to tables, but none have the column
551            return ResolutionResult.notFound(columnName,
552                "Column '" + columnName + "' not found in any of " + resolved.getCount() + " tables");
553        }
554
555        if (candidates.size() == 1) {
556            // Only one table actually has the column
557            return ResolutionResult.exactMatch(candidates.get(0));
558        }
559
560        // Sort candidates by their table's position in the SQL text (FROM clause order)
561        // This ensures consistent ordering for both ambiguous results and GUESS_COLUMN_STRATEGY
562        sortCandidatesByTablePosition(candidates);
563
564        // Check if all candidates are "inferred" (from tables without DDL metadata)
565        // If so, return AMBIGUOUS regardless of GUESS_COLUMN_STRATEGY
566        // Only apply GUESS_COLUMN_STRATEGY when we have definite knowledge about the columns
567        //
568        // The determination uses configurable thresholds:
569        // - minDefiniteConfidence: minimum confidence to be considered "definite" (default 0.9)
570        // - allowGuessWhenAllInferred: if true, allow guessing even when all candidates are inferred
571        boolean hasDefiniteCandidate = false;
572        double highestConfidence = 0.0;
573        double minDefiniteConf = config != null ? config.getMinDefiniteConfidence() : 0.9;
574
575        for (ColumnSource candidate : candidates) {
576            // A candidate is "definite" if it has high confidence and is not just inferred from usage
577            String evidence = candidate.getEvidence();
578            double confidence = candidate.getConfidence();
579
580            // Track highest confidence among candidates
581            if (confidence > highestConfidence) {
582                highestConfidence = confidence;
583            }
584
585            // Use structured evidence if available, otherwise check legacy evidence
586            boolean isInferred;
587            if (candidate.getEvidenceDetail() != null) {
588                // Use the structured evidence's own determination
589                isInferred = !candidate.getEvidenceDetail().isHighConfidence() ||
590                             candidate.getEvidenceDetail().isInferred();
591            } else {
592                // Fallback to legacy logic
593                // "inferred_from_usage" means table without DDL metadata - not definite
594                // Low confidence (< minDefiniteConf) means we're guessing - not definite
595                // Null evidence with high confidence is also considered inferred
596                isInferred = (evidence == null || evidence.equals("inferred_from_usage")) ||
597                             confidence < minDefiniteConf;
598            }
599
600            if (!isInferred) {
601                hasDefiniteCandidate = true;
602                break;
603            }
604        }
605
606        // Check if we should allow guessing when all candidates are inferred
607        boolean allowGuessInferred = config != null && config.isAllowGuessWhenAllInferred();
608
609        if (!hasDefiniteCandidate && !allowGuessInferred) {
610            // All candidates are from tables without DDL metadata (all inferred)
611            // Don't guess - return as ambiguous so formatter can handle appropriately
612            if (DEBUG_RESOLUTION) {
613                System.out.println("[DEBUG-RESOLVE]   AMBIGUOUS (all inferred): " + columnName + " with " + candidates.size() + " candidates");
614                for (ColumnSource c : candidates) {
615                    System.out.println("[DEBUG-RESOLVE]     - " + (c.getSourceNamespace() != null ? c.getSourceNamespace().getDisplayName() : "null") +
616                        " evidence=" + c.getEvidence() + " confidence=" + c.getConfidence());
617                }
618            }
619            AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates);
620            return ResolutionResult.ambiguous(ambiguous);
621        }
622
623        // Additional check: even if we have definite candidates or allow inferred guessing,
624        // require at least one candidate to meet the minConfidenceToGuess threshold
625        double minConfToGuess = config != null ? config.getMinConfidenceToGuess() : 0.95;
626        if (highestConfidence < minConfToGuess && !allowGuessInferred) {
627            // No candidate has sufficient confidence for guessing
628            if (DEBUG_RESOLUTION) {
629                System.out.println("[DEBUG-RESOLVE]   AMBIGUOUS (confidence too low): " + columnName +
630                    " highest=" + highestConfidence + " required=" + minConfToGuess);
631            }
632            AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates);
633            return ResolutionResult.ambiguous(ambiguous);
634        }
635
636        // Multiple tables have the column - apply GUESS_COLUMN_STRATEGY
637        // Candidates are already sorted by table position (done earlier)
638        int strategy = getGuessColumnStrategy();
639
640        if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_NEAREST) {
641            // Pick the first candidate (nearest table in FROM clause order)
642            return ResolutionResult.exactMatch(candidates.get(0));
643        } else if (strategy == TSQLResolverConfig.GUESS_COLUMN_STRATEGY_FARTHEST) {
644            // Pick the last candidate (farthest table in FROM clause order)
645            return ResolutionResult.exactMatch(candidates.get(candidates.size() - 1));
646        }
647
648        // GUESS_COLUMN_STRATEGY_NOT_PICKUP: leave as ambiguous
649        if (DEBUG_RESOLUTION) {
650            System.out.println("[DEBUG-RESOLVE]   AMBIGUOUS (NOT_PICKUP strategy): " + columnName + " with " + candidates.size() + " candidates");
651        }
652        AmbiguousColumnSource ambiguous = new AmbiguousColumnSource(columnName, candidates);
653        return ResolutionResult.ambiguous(ambiguous);
654    }
655
656    /**
657     * Deduplicate matches by namespace identity.
658     * The same namespace can be found through different scope paths (e.g., CTE scope and SELECT scope)
659     * but it's still the same source - not truly ambiguous.
660     *
661     * @param matches All matches from scope resolution
662     * @return List of unique matches by namespace identity
663     */
664    private List<ResolvedImpl.Match> deduplicateMatchesByNamespace(List<ResolvedImpl.Match> matches) {
665        if (matches == null || matches.size() <= 1) {
666            return matches;
667        }
668
669        // Use identity-based set to track unique namespaces
670        java.util.IdentityHashMap<INamespace, ResolvedImpl.Match> uniqueByNamespace =
671            new java.util.IdentityHashMap<>();
672
673        for (ResolvedImpl.Match match : matches) {
674            if (match.namespace != null && !uniqueByNamespace.containsKey(match.namespace)) {
675                uniqueByNamespace.put(match.namespace, match);
676            }
677        }
678
679        return new ArrayList<>(uniqueByNamespace.values());
680    }
681
682    /**
683     * Sort candidates by their source table's position in the SQL text.
684     * This ensures that when GUESS_COLUMN_STRATEGY_NEAREST or FARTHEST is applied,
685     * the candidates are ordered according to their actual position in the FROM clause.
686     *
687     * Uses the table's start token (lineNo, columnNo) to determine position.
688     *
689     * @param candidates List of ColumnSource candidates to sort in place
690     */
691    private void sortCandidatesByTablePosition(List<ColumnSource> candidates) {
692        if (candidates == null || candidates.size() <= 1) {
693            return;
694        }
695
696        candidates.sort(new Comparator<ColumnSource>() {
697            @Override
698            public int compare(ColumnSource c1, ColumnSource c2) {
699                TTable t1 = c1.getFinalTable();
700                TTable t2 = c2.getFinalTable();
701
702                // If either table is null, maintain relative order
703                if (t1 == null && t2 == null) return 0;
704                if (t1 == null) return 1;  // null tables go to the end
705                if (t2 == null) return -1;
706
707                TSourceToken token1 = t1.getStartToken();
708                TSourceToken token2 = t2.getStartToken();
709
710                // If either token is null, maintain relative order
711                if (token1 == null && token2 == null) return 0;
712                if (token1 == null) return 1;
713                if (token2 == null) return -1;
714
715                // Compare by line number first
716                int lineCmp = Long.compare(token1.lineNo, token2.lineNo);
717                if (lineCmp != 0) {
718                    return lineCmp;
719                }
720
721                // If same line, compare by column number
722                return Long.compare(token1.columnNo, token2.columnNo);
723            }
724        });
725    }
726
727    /**
728     * Update TObjectName with the resolution result.
729     * Also registers with ResolutionContext.
730     *
731     * For ambiguous columns (multiple candidate tables), this also populates
732     * TObjectName.candidateTables with all possible source tables.
733     *
734     * IMPORTANT: When resolution fails (notFound) and the column's sourceTable
735     * was set during Phase 1 to an UNNEST table, we clear the sourceTable.
736     * This is because UNNEST tables have a fixed set of columns (implicit column,
737     * offset, struct fields) and should NOT have arbitrary columns inferred.
738     * Clearing sourceTable allows the formatter to treat these as "missed" columns.
739     */
740    private void updateObjectNameWithResult(TObjectName objName, ResolutionResult result) {
741        // Update TObjectName with resolution result
742        objName.setResolution(result);
743
744        // For notFound results, check if Phase 1 incorrectly linked to UNNEST table
745        // UNNEST tables have a fixed column set - don't allow inferred columns
746        if (!result.isExactMatch() && !result.isAmbiguous()) {
747            TTable currentSourceTable = objName.getSourceTable();
748            if (currentSourceTable != null &&
749                currentSourceTable.getTableType() == ETableSource.unnest) {
750                // Clear the incorrectly set sourceTable from Phase 1
751                // This column wasn't found in the UNNEST namespace, so it shouldn't
752                // be attributed to the UNNEST table
753                objName.setSourceTable(null);
754                if (DEBUG_RESOLUTION) {
755                    System.out.println("[DEBUG-RESOLVE]   Cleared incorrect UNNEST sourceTable for: " +
756                        objName + " (not found in UNNEST namespace)");
757                }
758            }
759        }
760
761        // For ambiguous results, populate candidateTables with all candidate tables
762        // IMPORTANT: Clear existing candidateTables first, as Phase 1 (linkColumnToTable)
763        // may have added candidates from incorrect scopes (e.g., MERGE target table for
764        // columns inside USING subquery). Phase 2 (NameResolver) has proper scope awareness
765        // and produces the authoritative candidate list.
766        if (result.isAmbiguous() && result.getAmbiguousSource() != null) {
767            AmbiguousColumnSource ambiguous = result.getAmbiguousSource();
768            // Clear Phase 1 candidates before adding Phase 2's scope-aware candidates
769            objName.getCandidateTables().clear();
770            for (ColumnSource candidate : ambiguous.getCandidates()) {
771                gudusoft.gsqlparser.nodes.TTable candidateTable = candidate.getFinalTable();
772                if (candidateTable != null) {
773                    objName.getCandidateTables().addTable(candidateTable);
774                }
775            }
776        }
777
778        // Register with context for global querying
779        context.registerResolution(objName, result);
780    }
781
782    /**
783     * Resolve a column within a specific namespace (for direct lookups).
784     */
785    public ResolutionResult resolveInNamespace(String columnName, INamespace namespace) {
786        if (columnName == null || namespace == null) {
787            return ResolutionResult.notFound("<null>");
788        }
789
790        ColumnSource source = namespace.resolveColumn(columnName);
791        if (source != null) {
792            return ResolutionResult.exactMatch(source);
793        }
794
795        return ResolutionResult.notFound(columnName);
796    }
797
798    /**
799     * Find all namespaces that contain a given column.
800     * Used for implementing full candidate collection in ambiguous scenarios.
801     */
802    public List<INamespace> findNamespacesWithColumn(String columnName, IScope scope) {
803        List<INamespace> result = new ArrayList<>();
804
805        for (INamespace ns : scope.getVisibleNamespaces()) {
806            if (ns.hasColumn(columnName) == ColumnLevel.EXISTS) {
807                result.add(ns);
808            }
809        }
810
811        return result;
812    }
813
814    public INameMatcher getNameMatcher() {
815        return nameMatcher;
816    }
817
818    public ResolutionContext getContext() {
819        return context;
820    }
821}