001package gudusoft.gsqlparser.resolver2; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TBaseType; 005import gudusoft.gsqlparser.resolver2.format.DisplayNameMode; 006import gudusoft.gsqlparser.resolver2.format.DisplayNamePolicy; 007import gudusoft.gsqlparser.resolver2.matcher.DefaultNameMatcher; 008import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 009import gudusoft.gsqlparser.resolver2.matcher.VendorNameMatcher; 010 011/** 012 * Configuration for TSQLResolver2. 013 * Controls various aspects of name resolution behavior. 014 */ 015public class TSQLResolverConfig { 016 017 /** Name matcher for case sensitivity and matching rules */ 018 private INameMatcher nameMatcher = new DefaultNameMatcher(); 019 020 /** Database vendor for vendor-specific name matching */ 021 private EDbVendor vendor = null; 022 023 /** Whether to enable legacy compatibility mode (sync results to TTable.linkedColumns) */ 024 private boolean legacyCompatibilityEnabled = true; 025 026 /** 027 * Minimum confidence threshold for syncing to legacy structures. 028 * 029 * Recommended values: 030 * - 1.0 (default): Only sync definite results (safest, for SQL validation) 031 * - 0.7: Include high-confidence inferences (for data lineage analysis) 032 * - 0.5: Include all inferences (may mislead legacy code, not recommended) 033 * 034 * Use cases: 035 * - Data lineage tools may want 0.7 inference results 036 * - SQL formatters only need 1.0 definite results 037 */ 038 private double legacySyncMinConfidence = 1.0; 039 040 /** Maximum iterations for iterative resolution */ 041 private int maxIterations = 10; 042 043 /** Minimum progress rate to continue iteration (0.0 - 1.0) */ 044 private double minProgressRate = 0.01; // 1% 045 046 /** Number of stable passes required to declare convergence */ 047 private int stablePassesForConvergence = 2; 048 049 /** Whether to collect full candidates for ambiguous columns */ 050 private boolean collectFullCandidates = true; 051 052 /** Whether to enable legacy evidence collection (deprecated, use NamespaceEnhancer) */ 053 private boolean evidenceCollectionEnabled = false; 054 055 /** 056 * Whether to show datatype information for columns from CREATE TABLE statements. 057 * When enabled, columns from CREATE TABLE will include datatype info in format: 058 * columnName:datatypeName:length or columnName:datatypeName:precision:scale 059 */ 060 private boolean showDatatype = false; 061 062 /** 063 * Whether to show CTE (Common Table Expression) tables and their columns in output. 064 * When enabled, CTE tables are included in the tables list and CTE columns 065 * are included in the fields list with "(CTE)" suffix. 066 * Default is false for backward compatibility. 067 */ 068 private boolean showCTE = false; 069 070 // ========== DISPLAY NAME Configuration ========== 071 // Controls how identifier names are rendered for output 072 073 /** 074 * Display name mode - controls how identifiers are rendered. 075 * <ul> 076 * <li>DISPLAY: Strip delimiters, preserve original case (default, recommended for debugging)</li> 077 * <li>SQL_RENDER: Preserve delimiters for valid SQL regeneration</li> 078 * <li>CANONICAL: Apply vendor-specific case folding</li> 079 * </ul> 080 */ 081 private DisplayNameMode displayNameMode = DisplayNameMode.DISPLAY; 082 083 /** 084 * Display name policy - controls which occurrence to use when same object 085 * appears multiple times with different spellings. 086 * <ul> 087 * <li>PREFER_DEFINITION_SITE: Use spelling from definition (CTE def, CREATE TABLE, etc.)</li> 088 * <li>PREFER_FIRST_OCCURRENCE: Use first occurrence in SQL text</li> 089 * <li>PREFER_METADATA: Use spelling from database metadata</li> 090 * </ul> 091 */ 092 private DisplayNamePolicy displayNamePolicy = DisplayNamePolicy.PREFER_DEFINITION_SITE; 093 094 /** 095 * Whether to strip delimiters (quotes, backticks, brackets) from identifier display. 096 * Only applies when displayNameMode is DISPLAY. 097 * Default is true. 098 */ 099 private boolean stripDelimitersForDisplay = true; 100 101 // ========== GUESS_COLUMN_STRATEGY Configuration ========== 102 // Strategy for handling ambiguous columns (when a column could belong to multiple tables) 103 104 /** Pick the first candidate table (nearest in FROM clause order) */ 105 public static final int GUESS_COLUMN_STRATEGY_NEAREST = TBaseType.GUESS_COLUMN_STRATEGY_NEAREST; 106 107 /** Pick the last candidate table (farthest in FROM clause order) */ 108 public static final int GUESS_COLUMN_STRATEGY_FARTHEST = TBaseType.GUESS_COLUMN_STRATEGY_FARTHEST; 109 110 /** Do not pick any candidate, leave as unresolved/ambiguous */ 111 public static final int GUESS_COLUMN_STRATEGY_NOT_PICKUP = TBaseType.GUESS_COLUMN_STRATEGY_NOT_PICKUP; 112 113 /** Human-readable names for strategy values */ 114 public static final String[] GUESS_COLUMN_STRATEGY_NAMES = TBaseType.GUESS_COLUMN_STRATEGY_MSG; 115 116 /** 117 * Strategy for handling ambiguous columns. 118 * Default: reads from TBaseType.GUESS_COLUMN_STRATEGY for backward compatibility. 119 * Can be overridden per-config instance. 120 */ 121 private Integer guessColumnStrategy = null; // null means use TBaseType default 122 123 // ========== CONFIDENCE THRESHOLD Configuration ========== 124 // Controls when resolutions are considered "definite" vs "inferred" and when guessing is allowed 125 126 /** 127 * Minimum confidence threshold for a resolution to be considered "definite". 128 * 129 * <p>Resolutions with confidence >= this threshold are treated as having 130 * strong evidence (e.g., DDL metadata, qualified references). Resolutions 131 * below this threshold are considered "inferred" or "uncertain".</p> 132 * 133 * <p>Recommended values:</p> 134 * <ul> 135 * <li>0.9 (default): High bar - only high-confidence resolutions are definite</li> 136 * <li>0.7: Include more inferred resolutions as definite</li> 137 * <li>1.0: Only metadata-backed resolutions are definite</li> 138 * </ul> 139 */ 140 private double minDefiniteConfidence = 0.9; 141 142 /** 143 * Minimum confidence threshold to allow guessing from ambiguous candidates. 144 * 145 * <p>When multiple candidates exist and GUESS_COLUMN_STRATEGY is not NOT_PICKUP, 146 * at least one candidate must have confidence >= this threshold for guessing 147 * to be allowed. If all candidates are below this threshold, the column 148 * remains AMBIGUOUS regardless of strategy.</p> 149 * 150 * <p>Recommended values:</p> 151 * <ul> 152 * <li>0.95 (default): Very high bar - only guess with strong evidence</li> 153 * <li>0.9: Allow guessing with high-confidence candidates</li> 154 * <li>0.7: Allow guessing with inferred candidates (not recommended)</li> 155 * </ul> 156 */ 157 private double minConfidenceToGuess = 0.95; 158 159 /** 160 * Whether to allow guessing when all candidates are inferred (no DDL metadata). 161 * 162 * <p>When false (default), if all candidates have confidence below 163 * {@link #minDefiniteConfidence}, the column remains AMBIGUOUS even if 164 * GUESS_COLUMN_STRATEGY would normally pick one. This prevents guessing 165 * based on uncertain evidence.</p> 166 * 167 * <p>When true, guessing is allowed even when all candidates are inferred, 168 * as long as the GUESS_COLUMN_STRATEGY is not NOT_PICKUP. Use with caution 169 * as this may produce misleading lineage results.</p> 170 */ 171 private boolean allowGuessWhenAllInferred = false; 172 173 public TSQLResolverConfig() { 174 // Default configuration 175 } 176 177 public INameMatcher getNameMatcher() { 178 return nameMatcher; 179 } 180 181 public void setNameMatcher(INameMatcher nameMatcher) { 182 if (nameMatcher == null) { 183 throw new IllegalArgumentException("Name matcher cannot be null"); 184 } 185 this.nameMatcher = nameMatcher; 186 } 187 188 public boolean isLegacyCompatibilityEnabled() { 189 return legacyCompatibilityEnabled; 190 } 191 192 public void setLegacyCompatibilityEnabled(boolean enabled) { 193 this.legacyCompatibilityEnabled = enabled; 194 } 195 196 public double getLegacySyncMinConfidence() { 197 return legacySyncMinConfidence; 198 } 199 200 public void setLegacySyncMinConfidence(double threshold) { 201 if (threshold < 0.0 || threshold > 1.0) { 202 throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]"); 203 } 204 this.legacySyncMinConfidence = threshold; 205 } 206 207 public int getMaxIterations() { 208 return maxIterations; 209 } 210 211 public void setMaxIterations(int maxIterations) { 212 if (maxIterations < 1) { 213 throw new IllegalArgumentException("Max iterations must be at least 1"); 214 } 215 this.maxIterations = maxIterations; 216 } 217 218 public double getMinProgressRate() { 219 return minProgressRate; 220 } 221 222 public void setMinProgressRate(double minProgressRate) { 223 if (minProgressRate < 0.0 || minProgressRate > 1.0) { 224 throw new IllegalArgumentException("Progress rate must be in [0.0, 1.0]"); 225 } 226 this.minProgressRate = minProgressRate; 227 } 228 229 public int getStablePassesForConvergence() { 230 return stablePassesForConvergence; 231 } 232 233 public void setStablePassesForConvergence(int stablePasses) { 234 if (stablePasses < 1) { 235 throw new IllegalArgumentException("Stable passes must be at least 1"); 236 } 237 this.stablePassesForConvergence = stablePasses; 238 } 239 240 public boolean isCollectFullCandidates() { 241 return collectFullCandidates; 242 } 243 244 public void setCollectFullCandidates(boolean collectFullCandidates) { 245 this.collectFullCandidates = collectFullCandidates; 246 } 247 248 /** 249 * @deprecated Use NamespaceEnhancer instead 250 */ 251 public boolean isEvidenceCollectionEnabled() { 252 return evidenceCollectionEnabled; 253 } 254 255 /** 256 * @deprecated Use NamespaceEnhancer instead 257 */ 258 public void setEvidenceCollectionEnabled(boolean enabled) { 259 this.evidenceCollectionEnabled = enabled; 260 } 261 262 /** 263 * Check if datatype information should be shown for columns from CREATE TABLE statements. 264 * 265 * @return true if datatype information should be included in column names 266 */ 267 public boolean isShowDatatype() { 268 return showDatatype; 269 } 270 271 /** 272 * Set whether to show datatype information for columns from CREATE TABLE statements. 273 * When enabled, columns from CREATE TABLE will include datatype info in format: 274 * columnName:datatypeName:length or columnName:datatypeName:precision:scale 275 * 276 * @param showDatatype true to include datatype information 277 */ 278 public void setShowDatatype(boolean showDatatype) { 279 this.showDatatype = showDatatype; 280 } 281 282 /** 283 * Check if CTE (Common Table Expression) tables and columns should be shown in output. 284 * 285 * @return true if CTE tables and columns should be included 286 */ 287 public boolean isShowCTE() { 288 return showCTE; 289 } 290 291 /** 292 * Set whether to show CTE (Common Table Expression) tables and columns in output. 293 * When enabled, CTE tables are included in the tables list and CTE columns 294 * are included in the fields list with "(CTE)" suffix. 295 * 296 * @param showCTE true to include CTE tables and columns 297 */ 298 public void setShowCTE(boolean showCTE) { 299 this.showCTE = showCTE; 300 } 301 302 // ========== Display Name Getters and Setters ========== 303 304 /** 305 * Get the display name mode. 306 * 307 * @return the current display name mode 308 */ 309 public DisplayNameMode getDisplayNameMode() { 310 return displayNameMode; 311 } 312 313 /** 314 * Set the display name mode. 315 * 316 * @param mode the display name mode 317 */ 318 public void setDisplayNameMode(DisplayNameMode mode) { 319 this.displayNameMode = mode != null ? mode : DisplayNameMode.DISPLAY; 320 } 321 322 /** 323 * Get the display name policy. 324 * 325 * @return the current display name policy 326 */ 327 public DisplayNamePolicy getDisplayNamePolicy() { 328 return displayNamePolicy; 329 } 330 331 /** 332 * Set the display name policy. 333 * 334 * @param policy the display name policy 335 */ 336 public void setDisplayNamePolicy(DisplayNamePolicy policy) { 337 this.displayNamePolicy = policy != null ? policy : DisplayNamePolicy.PREFER_DEFINITION_SITE; 338 } 339 340 /** 341 * Check if delimiters should be stripped for display. 342 * 343 * @return true if delimiters should be stripped 344 */ 345 public boolean isStripDelimitersForDisplay() { 346 return stripDelimitersForDisplay; 347 } 348 349 /** 350 * Set whether to strip delimiters for display. 351 * 352 * @param stripDelimiters true to strip delimiters 353 */ 354 public void setStripDelimitersForDisplay(boolean stripDelimiters) { 355 this.stripDelimitersForDisplay = stripDelimiters; 356 } 357 358 /** 359 * Get the strategy for handling ambiguous columns. 360 * Returns the configured value, or TBaseType.GUESS_COLUMN_STRATEGY if not set. 361 * 362 * @return One of GUESS_COLUMN_STRATEGY_NEAREST, GUESS_COLUMN_STRATEGY_FARTHEST, 363 * or GUESS_COLUMN_STRATEGY_NOT_PICKUP 364 */ 365 public int getGuessColumnStrategy() { 366 return guessColumnStrategy != null ? guessColumnStrategy : TBaseType.GUESS_COLUMN_STRATEGY; 367 } 368 369 /** 370 * Set the strategy for handling ambiguous columns. 371 * 372 * @param strategy One of GUESS_COLUMN_STRATEGY_NEAREST, GUESS_COLUMN_STRATEGY_FARTHEST, 373 * or GUESS_COLUMN_STRATEGY_NOT_PICKUP 374 */ 375 public void setGuessColumnStrategy(int strategy) { 376 if (strategy < GUESS_COLUMN_STRATEGY_NEAREST || strategy > GUESS_COLUMN_STRATEGY_NOT_PICKUP) { 377 throw new IllegalArgumentException("Invalid strategy: " + strategy + 378 ". Must be GUESS_COLUMN_STRATEGY_NEAREST (0), GUESS_COLUMN_STRATEGY_FARTHEST (1), " + 379 "or GUESS_COLUMN_STRATEGY_NOT_PICKUP (2)"); 380 } 381 this.guessColumnStrategy = strategy; 382 } 383 384 /** 385 * Check if a custom guess column strategy has been set on this config. 386 * If false, the strategy from TBaseType.GUESS_COLUMN_STRATEGY will be used. 387 * 388 * @return true if a custom strategy is set 389 */ 390 public boolean hasCustomGuessColumnStrategy() { 391 return guessColumnStrategy != null; 392 } 393 394 /** 395 * Clear any custom guess column strategy, reverting to TBaseType.GUESS_COLUMN_STRATEGY. 396 */ 397 public void clearGuessColumnStrategy() { 398 this.guessColumnStrategy = null; 399 } 400 401 /** 402 * Get the human-readable name for the current strategy. 403 * 404 * @return Strategy name (e.g., "GUESS_COLUMN_STRATEGY_NEAREST") 405 */ 406 public String getGuessColumnStrategyName() { 407 int strategy = getGuessColumnStrategy(); 408 if (strategy >= 0 && strategy < GUESS_COLUMN_STRATEGY_NAMES.length) { 409 return GUESS_COLUMN_STRATEGY_NAMES[strategy]; 410 } 411 return "UNKNOWN(" + strategy + ")"; 412 } 413 414 // ========== Confidence Threshold Getters and Setters ========== 415 416 /** 417 * Get the minimum confidence threshold for definite resolutions. 418 * 419 * @return Threshold value [0.0, 1.0] 420 */ 421 public double getMinDefiniteConfidence() { 422 return minDefiniteConfidence; 423 } 424 425 /** 426 * Set the minimum confidence threshold for definite resolutions. 427 * 428 * @param threshold Threshold value [0.0, 1.0] 429 */ 430 public void setMinDefiniteConfidence(double threshold) { 431 if (threshold < 0.0 || threshold > 1.0) { 432 throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]"); 433 } 434 this.minDefiniteConfidence = threshold; 435 } 436 437 /** 438 * Get the minimum confidence threshold to allow guessing. 439 * 440 * @return Threshold value [0.0, 1.0] 441 */ 442 public double getMinConfidenceToGuess() { 443 return minConfidenceToGuess; 444 } 445 446 /** 447 * Set the minimum confidence threshold to allow guessing. 448 * 449 * @param threshold Threshold value [0.0, 1.0] 450 */ 451 public void setMinConfidenceToGuess(double threshold) { 452 if (threshold < 0.0 || threshold > 1.0) { 453 throw new IllegalArgumentException("Confidence threshold must be in [0.0, 1.0]"); 454 } 455 this.minConfidenceToGuess = threshold; 456 } 457 458 /** 459 * Check if guessing is allowed when all candidates are inferred. 460 * 461 * @return true if guessing is allowed with inferred candidates 462 */ 463 public boolean isAllowGuessWhenAllInferred() { 464 return allowGuessWhenAllInferred; 465 } 466 467 /** 468 * Set whether to allow guessing when all candidates are inferred. 469 * 470 * @param allow true to allow guessing with inferred candidates 471 */ 472 public void setAllowGuessWhenAllInferred(boolean allow) { 473 this.allowGuessWhenAllInferred = allow; 474 } 475 476 /** 477 * Check if a confidence value represents a definite resolution. 478 * 479 * @param confidence The confidence value to check 480 * @return true if the confidence is >= minDefiniteConfidence 481 */ 482 public boolean isDefiniteConfidence(double confidence) { 483 return confidence >= minDefiniteConfidence; 484 } 485 486 /** 487 * Check if a confidence value is sufficient to allow guessing. 488 * 489 * @param confidence The confidence value to check 490 * @return true if the confidence is >= minConfidenceToGuess 491 */ 492 public boolean canGuessWithConfidence(double confidence) { 493 return confidence >= minConfidenceToGuess; 494 } 495 496 /** 497 * Create a default configuration 498 */ 499 public static TSQLResolverConfig createDefault() { 500 return new TSQLResolverConfig(); 501 } 502 503 /** 504 * Create configuration for case-sensitive matching 505 */ 506 public static TSQLResolverConfig createCaseSensitive() { 507 TSQLResolverConfig config = new TSQLResolverConfig(); 508 config.setNameMatcher(new DefaultNameMatcher(true)); 509 return config; 510 } 511 512 /** 513 * Create configuration for standalone mode (no legacy sync) 514 */ 515 public static TSQLResolverConfig createStandalone() { 516 TSQLResolverConfig config = new TSQLResolverConfig(); 517 config.setLegacyCompatibilityEnabled(false); 518 return config; 519 } 520 521 /** 522 * Create configuration with showDatatype enabled. 523 * This configuration includes datatype information for columns from CREATE TABLE statements. 524 */ 525 public static TSQLResolverConfig createWithDatatype() { 526 TSQLResolverConfig config = new TSQLResolverConfig(); 527 config.setShowDatatype(true); 528 return config; 529 } 530 531 /** 532 * Create configuration with showCTE enabled. 533 * This configuration includes CTE tables and columns in the output. 534 */ 535 public static TSQLResolverConfig createWithCTE() { 536 TSQLResolverConfig config = new TSQLResolverConfig(); 537 config.setShowCTE(true); 538 return config; 539 } 540 541 /** 542 * Create configuration for a specific database vendor. 543 * 544 * <p>This factory method creates a configuration with vendor-specific 545 * name matching rules. The VendorNameMatcher uses IdentifierService 546 * to properly handle case sensitivity and quote handling for each vendor.</p> 547 * 548 * <p>Example vendor behaviors:</p> 549 * <ul> 550 * <li>Oracle: Unquoted identifiers fold to UPPER, quoted are case-sensitive</li> 551 * <li>PostgreSQL: Unquoted identifiers fold to LOWER, quoted are case-sensitive</li> 552 * <li>MySQL: Depends on lower_case_table_names setting</li> 553 * <li>BigQuery: Table names are case-sensitive, column names are case-insensitive</li> 554 * </ul> 555 * 556 * @param vendor the database vendor 557 * @return configuration with vendor-specific name matcher 558 */ 559 public static TSQLResolverConfig createForVendor(EDbVendor vendor) { 560 TSQLResolverConfig config = new TSQLResolverConfig(); 561 config.vendor = vendor; 562 config.nameMatcher = new VendorNameMatcher(vendor); 563 return config; 564 } 565 566 /** 567 * Create configuration for a specific database vendor with datatype display enabled. 568 * 569 * @param vendor the database vendor 570 * @return configuration with vendor-specific name matcher and datatype display 571 */ 572 public static TSQLResolverConfig createForVendorWithDatatype(EDbVendor vendor) { 573 TSQLResolverConfig config = createForVendor(vendor); 574 config.setShowDatatype(true); 575 return config; 576 } 577 578 /** 579 * Get the database vendor, if set. 580 * 581 * @return the database vendor, or null if not set 582 */ 583 public EDbVendor getVendor() { 584 return vendor; 585 } 586 587 /** 588 * Set the database vendor and update name matcher accordingly. 589 * 590 * @param vendor the database vendor 591 */ 592 public void setVendor(EDbVendor vendor) { 593 this.vendor = vendor; 594 if (vendor != null) { 595 this.nameMatcher = new VendorNameMatcher(vendor); 596 } 597 } 598 599 /** 600 * Check if vendor-specific name matching is enabled. 601 * 602 * @return true if a vendor is configured 603 */ 604 public boolean hasVendor() { 605 return vendor != null; 606 } 607 608 @Override 609 public String toString() { 610 return String.format( 611 "TSQLResolverConfig{vendor=%s, nameMatcher=%s, legacyCompat=%s, minConfidence=%.2f, maxIter=%d, guessStrategy=%s, " + 612 "minDefiniteConf=%.2f, minGuessConf=%.2f, allowGuessInferred=%s, showDatatype=%s, showCTE=%s, displayMode=%s, displayPolicy=%s}", 613 vendor, 614 nameMatcher, 615 legacyCompatibilityEnabled, 616 legacySyncMinConfidence, 617 maxIterations, 618 getGuessColumnStrategyName(), 619 minDefiniteConfidence, 620 minConfidenceToGuess, 621 allowGuessWhenAllInferred, 622 showDatatype, 623 showCTE, 624 displayNameMode, 625 displayNamePolicy 626 ); 627 } 628}