001package gudusoft.gsqlparser.resolver2;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TBaseType;
005import gudusoft.gsqlparser.resolver2.format.DisplayNameMode;
006import gudusoft.gsqlparser.resolver2.format.DisplayNamePolicy;
007import gudusoft.gsqlparser.resolver2.matcher.DefaultNameMatcher;
008import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
009import gudusoft.gsqlparser.resolver2.matcher.VendorNameMatcher;
010
011/**
012 * Configuration for TSQLResolver2.
013 * Controls various aspects of name resolution behavior.
014 */
015public class TSQLResolverConfig {
016
017    /** Name matcher for case sensitivity and matching rules */
018    private INameMatcher nameMatcher = new DefaultNameMatcher();
019
020    /** Database vendor for vendor-specific name matching */
021    private EDbVendor vendor = null;
022
023    /** Whether to enable legacy compatibility mode (sync results to TTable.linkedColumns) */
024    private boolean legacyCompatibilityEnabled = true;
025
026    /**
027     * Minimum confidence threshold for syncing to legacy structures.
028     *
029     * Recommended values:
030     * - 1.0 (default): Only sync definite results (safest, for SQL validation)
031     * - 0.7: Include high-confidence inferences (for data lineage analysis)
032     * - 0.5: Include all inferences (may mislead legacy code, not recommended)
033     *
034     * Use cases:
035     * - Data lineage tools may want 0.7 inference results
036     * - SQL formatters only need 1.0 definite results
037     */
038    private double legacySyncMinConfidence = 1.0;
039
040    /** Maximum iterations for iterative resolution */
041    private int maxIterations = 10;
042
043    /** Minimum progress rate to continue iteration (0.0 - 1.0) */
044    private double minProgressRate = 0.01;  // 1%
045
046    /** Number of stable passes required to declare convergence */
047    private int stablePassesForConvergence = 2;
048
049    /** Whether to collect full candidates for ambiguous columns */
050    private boolean collectFullCandidates = true;
051
052    /** Whether to enable legacy evidence collection (deprecated, use NamespaceEnhancer) */
053    private boolean evidenceCollectionEnabled = false;
054
055    /**
056     * Whether to show datatype information for columns from CREATE TABLE statements.
057     * When enabled, columns from CREATE TABLE will include datatype info in format:
058     * columnName:datatypeName:length or columnName:datatypeName:precision:scale
059     */
060    private boolean showDatatype = false;
061
062    /**
063     * Whether to show CTE (Common Table Expression) tables and their columns in output.
064     * When enabled, CTE tables are included in the tables list and CTE columns
065     * are included in the fields list with "(CTE)" suffix.
066     * Default is false for backward compatibility.
067     */
068    private boolean showCTE = false;
069
070    // ========== DISPLAY NAME Configuration ==========
071    // Controls how identifier names are rendered for output
072
073    /**
074     * Display name mode - controls how identifiers are rendered.
075     * <ul>
076     *   <li>DISPLAY: Strip delimiters, preserve original case (default, recommended for debugging)</li>
077     *   <li>SQL_RENDER: Preserve delimiters for valid SQL regeneration</li>
078     *   <li>CANONICAL: Apply vendor-specific case folding</li>
079     * </ul>
080     */
081    private DisplayNameMode displayNameMode = DisplayNameMode.DISPLAY;
082
083    /**
084     * Display name policy - controls which occurrence to use when same object
085     * appears multiple times with different spellings.
086     * <ul>
087     *   <li>PREFER_DEFINITION_SITE: Use spelling from definition (CTE def, CREATE TABLE, etc.)</li>
088     *   <li>PREFER_FIRST_OCCURRENCE: Use first occurrence in SQL text</li>
089     *   <li>PREFER_METADATA: Use spelling from database metadata</li>
090     * </ul>
091     */
092    private DisplayNamePolicy displayNamePolicy = DisplayNamePolicy.PREFER_DEFINITION_SITE;
093
094    /**
095     * Whether to strip delimiters (quotes, backticks, brackets) from identifier display.
096     * Only applies when displayNameMode is DISPLAY.
097     * Default is true.
098     */
099    private boolean stripDelimitersForDisplay = true;
100
101    // ========== GUESS_COLUMN_STRATEGY Configuration ==========
102    // Strategy for handling ambiguous columns (when a column could belong to multiple tables)
103
104    /** Pick the first candidate table (nearest in FROM clause order) */
105    public static final int GUESS_COLUMN_STRATEGY_NEAREST = TBaseType.GUESS_COLUMN_STRATEGY_NEAREST;
106
107    /** Pick the last candidate table (farthest in FROM clause order) */
108    public static final int GUESS_COLUMN_STRATEGY_FARTHEST = TBaseType.GUESS_COLUMN_STRATEGY_FARTHEST;
109
110    /** Do not pick any candidate, leave as unresolved/ambiguous */
111    public static final int GUESS_COLUMN_STRATEGY_NOT_PICKUP = TBaseType.GUESS_COLUMN_STRATEGY_NOT_PICKUP;
112
113    /** Human-readable names for strategy values */
114    public static final String[] GUESS_COLUMN_STRATEGY_NAMES = TBaseType.GUESS_COLUMN_STRATEGY_MSG;
115
116    /**
117     * Strategy for handling ambiguous columns.
118     * Default: reads from TBaseType.GUESS_COLUMN_STRATEGY for backward compatibility.
119     * Can be overridden per-config instance.
120     */
121    private Integer guessColumnStrategy = null; // null means use TBaseType default
122
123    // ========== CONFIDENCE THRESHOLD Configuration ==========
124    // Controls when resolutions are considered "definite" vs "inferred" and when guessing is allowed
125
126    /**
127     * Minimum confidence threshold for a resolution to be considered "definite".
128     *
129     * <p>Resolutions with confidence >= this threshold are treated as having
130     * strong evidence (e.g., DDL metadata, qualified references). Resolutions
131     * below this threshold are considered "inferred" or "uncertain".</p>
132     *
133     * <p>Recommended values:</p>
134     * <ul>
135     *   <li>0.9 (default): High bar - only high-confidence resolutions are definite</li>
136     *   <li>0.7: Include more inferred resolutions as definite</li>
137     *   <li>1.0: Only metadata-backed resolutions are definite</li>
138     * </ul>
139     */
140    private double minDefiniteConfidence = 0.9;
141
142    /**
143     * Minimum confidence threshold to allow guessing from ambiguous candidates.
144     *
145     * <p>When multiple candidates exist and GUESS_COLUMN_STRATEGY is not NOT_PICKUP,
146     * at least one candidate must have confidence >= this threshold for guessing
147     * to be allowed. If all candidates are below this threshold, the column
148     * remains AMBIGUOUS regardless of strategy.</p>
149     *
150     * <p>Recommended values:</p>
151     * <ul>
152     *   <li>0.95 (default): Very high bar - only guess with strong evidence</li>
153     *   <li>0.9: Allow guessing with high-confidence candidates</li>
154     *   <li>0.7: Allow guessing with inferred candidates (not recommended)</li>
155     * </ul>
156     */
157    private double minConfidenceToGuess = 0.95;
158
159    /**
160     * Whether to allow guessing when all candidates are inferred (no DDL metadata).
161     *
162     * <p>When false (default), if all candidates have confidence below
163     * {@link #minDefiniteConfidence}, the column remains AMBIGUOUS even if
164     * GUESS_COLUMN_STRATEGY would normally pick one. This prevents guessing
165     * based on uncertain evidence.</p>
166     *
167     * <p>When true, guessing is allowed even when all candidates are inferred,
168     * as long as the GUESS_COLUMN_STRATEGY is not NOT_PICKUP. Use with caution
169     * as this may produce misleading lineage results.</p>
170     */
171    private boolean allowGuessWhenAllInferred = false;
172
173    public TSQLResolverConfig() {
174        // Default configuration
175    }
176
177    public INameMatcher getNameMatcher() {
178        return nameMatcher;
179    }
180
181    public void setNameMatcher(INameMatcher nameMatcher) {
182        if (nameMatcher == null) {
183            throw new IllegalArgumentException("Name matcher cannot be null");
184        }
185        this.nameMatcher = nameMatcher;
186    }
187
188    public boolean isLegacyCompatibilityEnabled() {
189        return legacyCompatibilityEnabled;
190    }
191
192    public void setLegacyCompatibilityEnabled(boolean enabled) {
193        this.legacyCompatibilityEnabled = enabled;
194    }
195
196    public double getLegacySyncMinConfidence() {
197        return legacySyncMinConfidence;
198    }
199
200    public void setLegacySyncMinConfidence(double threshold) {
201        if (threshold < 0.0 || threshold > 1.0) {
202            throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]");
203        }
204        this.legacySyncMinConfidence = threshold;
205    }
206
207    public int getMaxIterations() {
208        return maxIterations;
209    }
210
211    public void setMaxIterations(int maxIterations) {
212        if (maxIterations < 1) {
213            throw new IllegalArgumentException("Max iterations must be at least 1");
214        }
215        this.maxIterations = maxIterations;
216    }
217
218    public double getMinProgressRate() {
219        return minProgressRate;
220    }
221
222    public void setMinProgressRate(double minProgressRate) {
223        if (minProgressRate < 0.0 || minProgressRate > 1.0) {
224            throw new IllegalArgumentException("Progress rate must be in [0.0, 1.0]");
225        }
226        this.minProgressRate = minProgressRate;
227    }
228
229    public int getStablePassesForConvergence() {
230        return stablePassesForConvergence;
231    }
232
233    public void setStablePassesForConvergence(int stablePasses) {
234        if (stablePasses < 1) {
235            throw new IllegalArgumentException("Stable passes must be at least 1");
236        }
237        this.stablePassesForConvergence = stablePasses;
238    }
239
240    public boolean isCollectFullCandidates() {
241        return collectFullCandidates;
242    }
243
244    public void setCollectFullCandidates(boolean collectFullCandidates) {
245        this.collectFullCandidates = collectFullCandidates;
246    }
247
248    /**
249     * @deprecated Use NamespaceEnhancer instead
250     */
251    public boolean isEvidenceCollectionEnabled() {
252        return evidenceCollectionEnabled;
253    }
254
255    /**
256     * @deprecated Use NamespaceEnhancer instead
257     */
258    public void setEvidenceCollectionEnabled(boolean enabled) {
259        this.evidenceCollectionEnabled = enabled;
260    }
261
262    /**
263     * Check if datatype information should be shown for columns from CREATE TABLE statements.
264     *
265     * @return true if datatype information should be included in column names
266     */
267    public boolean isShowDatatype() {
268        return showDatatype;
269    }
270
271    /**
272     * Set whether to show datatype information for columns from CREATE TABLE statements.
273     * When enabled, columns from CREATE TABLE will include datatype info in format:
274     * columnName:datatypeName:length or columnName:datatypeName:precision:scale
275     *
276     * @param showDatatype true to include datatype information
277     */
278    public void setShowDatatype(boolean showDatatype) {
279        this.showDatatype = showDatatype;
280    }
281
282    /**
283     * Check if CTE (Common Table Expression) tables and columns should be shown in output.
284     *
285     * @return true if CTE tables and columns should be included
286     */
287    public boolean isShowCTE() {
288        return showCTE;
289    }
290
291    /**
292     * Set whether to show CTE (Common Table Expression) tables and columns in output.
293     * When enabled, CTE tables are included in the tables list and CTE columns
294     * are included in the fields list with "(CTE)" suffix.
295     *
296     * @param showCTE true to include CTE tables and columns
297     */
298    public void setShowCTE(boolean showCTE) {
299        this.showCTE = showCTE;
300    }
301
302    // ========== Display Name Getters and Setters ==========
303
304    /**
305     * Get the display name mode.
306     *
307     * @return the current display name mode
308     */
309    public DisplayNameMode getDisplayNameMode() {
310        return displayNameMode;
311    }
312
313    /**
314     * Set the display name mode.
315     *
316     * @param mode the display name mode
317     */
318    public void setDisplayNameMode(DisplayNameMode mode) {
319        this.displayNameMode = mode != null ? mode : DisplayNameMode.DISPLAY;
320    }
321
322    /**
323     * Get the display name policy.
324     *
325     * @return the current display name policy
326     */
327    public DisplayNamePolicy getDisplayNamePolicy() {
328        return displayNamePolicy;
329    }
330
331    /**
332     * Set the display name policy.
333     *
334     * @param policy the display name policy
335     */
336    public void setDisplayNamePolicy(DisplayNamePolicy policy) {
337        this.displayNamePolicy = policy != null ? policy : DisplayNamePolicy.PREFER_DEFINITION_SITE;
338    }
339
340    /**
341     * Check if delimiters should be stripped for display.
342     *
343     * @return true if delimiters should be stripped
344     */
345    public boolean isStripDelimitersForDisplay() {
346        return stripDelimitersForDisplay;
347    }
348
349    /**
350     * Set whether to strip delimiters for display.
351     *
352     * @param stripDelimiters true to strip delimiters
353     */
354    public void setStripDelimitersForDisplay(boolean stripDelimiters) {
355        this.stripDelimitersForDisplay = stripDelimiters;
356    }
357
358    /**
359     * Get the strategy for handling ambiguous columns.
360     * Returns the configured value, or TBaseType.GUESS_COLUMN_STRATEGY if not set.
361     *
362     * @return One of GUESS_COLUMN_STRATEGY_NEAREST, GUESS_COLUMN_STRATEGY_FARTHEST,
363     *         or GUESS_COLUMN_STRATEGY_NOT_PICKUP
364     */
365    public int getGuessColumnStrategy() {
366        return guessColumnStrategy != null ? guessColumnStrategy : TBaseType.GUESS_COLUMN_STRATEGY;
367    }
368
369    /**
370     * Set the strategy for handling ambiguous columns.
371     *
372     * @param strategy One of GUESS_COLUMN_STRATEGY_NEAREST, GUESS_COLUMN_STRATEGY_FARTHEST,
373     *                 or GUESS_COLUMN_STRATEGY_NOT_PICKUP
374     */
375    public void setGuessColumnStrategy(int strategy) {
376        if (strategy < GUESS_COLUMN_STRATEGY_NEAREST || strategy > GUESS_COLUMN_STRATEGY_NOT_PICKUP) {
377            throw new IllegalArgumentException("Invalid strategy: " + strategy +
378                ". Must be GUESS_COLUMN_STRATEGY_NEAREST (0), GUESS_COLUMN_STRATEGY_FARTHEST (1), " +
379                "or GUESS_COLUMN_STRATEGY_NOT_PICKUP (2)");
380        }
381        this.guessColumnStrategy = strategy;
382    }
383
384    /**
385     * Check if a custom guess column strategy has been set on this config.
386     * If false, the strategy from TBaseType.GUESS_COLUMN_STRATEGY will be used.
387     *
388     * @return true if a custom strategy is set
389     */
390    public boolean hasCustomGuessColumnStrategy() {
391        return guessColumnStrategy != null;
392    }
393
394    /**
395     * Clear any custom guess column strategy, reverting to TBaseType.GUESS_COLUMN_STRATEGY.
396     */
397    public void clearGuessColumnStrategy() {
398        this.guessColumnStrategy = null;
399    }
400
401    /**
402     * Get the human-readable name for the current strategy.
403     *
404     * @return Strategy name (e.g., "GUESS_COLUMN_STRATEGY_NEAREST")
405     */
406    public String getGuessColumnStrategyName() {
407        int strategy = getGuessColumnStrategy();
408        if (strategy >= 0 && strategy < GUESS_COLUMN_STRATEGY_NAMES.length) {
409            return GUESS_COLUMN_STRATEGY_NAMES[strategy];
410        }
411        return "UNKNOWN(" + strategy + ")";
412    }
413
414    // ========== Confidence Threshold Getters and Setters ==========
415
416    /**
417     * Get the minimum confidence threshold for definite resolutions.
418     *
419     * @return Threshold value [0.0, 1.0]
420     */
421    public double getMinDefiniteConfidence() {
422        return minDefiniteConfidence;
423    }
424
425    /**
426     * Set the minimum confidence threshold for definite resolutions.
427     *
428     * @param threshold Threshold value [0.0, 1.0]
429     */
430    public void setMinDefiniteConfidence(double threshold) {
431        if (threshold < 0.0 || threshold > 1.0) {
432            throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]");
433        }
434        this.minDefiniteConfidence = threshold;
435    }
436
437    /**
438     * Get the minimum confidence threshold to allow guessing.
439     *
440     * @return Threshold value [0.0, 1.0]
441     */
442    public double getMinConfidenceToGuess() {
443        return minConfidenceToGuess;
444    }
445
446    /**
447     * Set the minimum confidence threshold to allow guessing.
448     *
449     * @param threshold Threshold value [0.0, 1.0]
450     */
451    public void setMinConfidenceToGuess(double threshold) {
452        if (threshold < 0.0 || threshold > 1.0) {
453            throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]");
454        }
455        this.minConfidenceToGuess = threshold;
456    }
457
458    /**
459     * Check if guessing is allowed when all candidates are inferred.
460     *
461     * @return true if guessing is allowed with inferred candidates
462     */
463    public boolean isAllowGuessWhenAllInferred() {
464        return allowGuessWhenAllInferred;
465    }
466
467    /**
468     * Set whether to allow guessing when all candidates are inferred.
469     *
470     * @param allow true to allow guessing with inferred candidates
471     */
472    public void setAllowGuessWhenAllInferred(boolean allow) {
473        this.allowGuessWhenAllInferred = allow;
474    }
475
476    /**
477     * Check if a confidence value represents a definite resolution.
478     *
479     * @param confidence The confidence value to check
480     * @return true if the confidence is >= minDefiniteConfidence
481     */
482    public boolean isDefiniteConfidence(double confidence) {
483        return confidence >= minDefiniteConfidence;
484    }
485
486    /**
487     * Check if a confidence value is sufficient to allow guessing.
488     *
489     * @param confidence The confidence value to check
490     * @return true if the confidence is >= minConfidenceToGuess
491     */
492    public boolean canGuessWithConfidence(double confidence) {
493        return confidence >= minConfidenceToGuess;
494    }
495
496    /**
497     * Create a default configuration
498     */
499    public static TSQLResolverConfig createDefault() {
500        return new TSQLResolverConfig();
501    }
502
503    /**
504     * Create configuration for case-sensitive matching
505     */
506    public static TSQLResolverConfig createCaseSensitive() {
507        TSQLResolverConfig config = new TSQLResolverConfig();
508        config.setNameMatcher(new DefaultNameMatcher(true));
509        return config;
510    }
511
512    /**
513     * Create configuration for standalone mode (no legacy sync)
514     */
515    public static TSQLResolverConfig createStandalone() {
516        TSQLResolverConfig config = new TSQLResolverConfig();
517        config.setLegacyCompatibilityEnabled(false);
518        return config;
519    }
520
521    /**
522     * Create configuration with showDatatype enabled.
523     * This configuration includes datatype information for columns from CREATE TABLE statements.
524     */
525    public static TSQLResolverConfig createWithDatatype() {
526        TSQLResolverConfig config = new TSQLResolverConfig();
527        config.setShowDatatype(true);
528        return config;
529    }
530
531    /**
532     * Create configuration with showCTE enabled.
533     * This configuration includes CTE tables and columns in the output.
534     */
535    public static TSQLResolverConfig createWithCTE() {
536        TSQLResolverConfig config = new TSQLResolverConfig();
537        config.setShowCTE(true);
538        return config;
539    }
540
541    /**
542     * Create configuration for a specific database vendor.
543     *
544     * <p>This factory method creates a configuration with vendor-specific
545     * name matching rules. The VendorNameMatcher uses IdentifierService
546     * to properly handle case sensitivity and quote handling for each vendor.</p>
547     *
548     * <p>Example vendor behaviors:</p>
549     * <ul>
550     *   <li>Oracle: Unquoted identifiers fold to UPPER, quoted are case-sensitive</li>
551     *   <li>PostgreSQL: Unquoted identifiers fold to LOWER, quoted are case-sensitive</li>
552     *   <li>MySQL: Depends on lower_case_table_names setting</li>
553     *   <li>BigQuery: Table names are case-sensitive, column names are case-insensitive</li>
554     * </ul>
555     *
556     * @param vendor the database vendor
557     * @return configuration with vendor-specific name matcher
558     */
559    public static TSQLResolverConfig createForVendor(EDbVendor vendor) {
560        TSQLResolverConfig config = new TSQLResolverConfig();
561        config.vendor = vendor;
562        config.nameMatcher = new VendorNameMatcher(vendor);
563        return config;
564    }
565
566    /**
567     * Create configuration for a specific database vendor with datatype display enabled.
568     *
569     * @param vendor the database vendor
570     * @return configuration with vendor-specific name matcher and datatype display
571     */
572    public static TSQLResolverConfig createForVendorWithDatatype(EDbVendor vendor) {
573        TSQLResolverConfig config = createForVendor(vendor);
574        config.setShowDatatype(true);
575        return config;
576    }
577
578    /**
579     * Get the database vendor, if set.
580     *
581     * @return the database vendor, or null if not set
582     */
583    public EDbVendor getVendor() {
584        return vendor;
585    }
586
587    /**
588     * Set the database vendor and update name matcher accordingly.
589     *
590     * @param vendor the database vendor
591     */
592    public void setVendor(EDbVendor vendor) {
593        this.vendor = vendor;
594        if (vendor != null) {
595            this.nameMatcher = new VendorNameMatcher(vendor);
596        }
597    }
598
599    /**
600     * Check if vendor-specific name matching is enabled.
601     *
602     * @return true if a vendor is configured
603     */
604    public boolean hasVendor() {
605        return vendor != null;
606    }
607
608    @Override
609    public String toString() {
610        return String.format(
611            "TSQLResolverConfig{vendor=%s, nameMatcher=%s, legacyCompat=%s, minConfidence=%.2f, maxIter=%d, guessStrategy=%s, " +
612            "minDefiniteConf=%.2f, minGuessConf=%.2f, allowGuessInferred=%s, showDatatype=%s, showCTE=%s, displayMode=%s, displayPolicy=%s}",
613            vendor,
614            nameMatcher,
615            legacyCompatibilityEnabled,
616            legacySyncMinConfidence,
617            maxIterations,
618            getGuessColumnStrategyName(),
619            minDefiniteConfidence,
620            minConfidenceToGuess,
621            allowGuessWhenAllInferred,
622            showDatatype,
623            showCTE,
624            displayNameMode,
625            displayNamePolicy
626        );
627    }
628}