001package gudusoft.gsqlparser.sqlenv; 002 003import java.util.Objects; 004 005/** 006 * 标识符规则四元组(per vendor, per object group) 007 * 008 * <p>定义数据库厂商的标识符大小写规则,区分 quoted 和 unquoted 标识符的处理方式。 009 * 010 * <p>设计来源:dbobject_search.md 资深设计师方案 011 * 012 * <p>使用示例: 013 * <pre> 014 * // Oracle: unquoted 折叠为大写且不敏感,quoted 保留原样且敏感 015 * IdentifierRules oracleRules = IdentifierRules.forOracle(); 016 * 017 * // Couchbase: 全部大小写敏感 018 * IdentifierRules couchbaseRules = IdentifierRules.forCouchbase(); 019 * </pre> 020 * 021 * @since 3.1.0.9 022 */ 023public final class IdentifierRules { 024 025 // ===== 四元组定义 ===== 026 027 /** 028 * Unquoted 标识符的大小写折叠规则 029 */ 030 public final CaseFold unquotedFold; 031 032 /** 033 * Unquoted 标识符的大小写比较规则 034 */ 035 public final CaseCompare unquotedCompare; 036 037 /** 038 * Quoted 标识符的大小写折叠规则(通常为 NONE,保留原样) 039 */ 040 public final CaseFold quotedFold; 041 042 /** 043 * Quoted 标识符的大小写比较规则 044 */ 045 public final CaseCompare quotedCompare; 046 047 // ===== 枚举定义 ===== 048 049 /** 050 * 大小写折叠规则(Case Folding) 051 * 052 * <p>决定如何规范化标识符的大小写 053 */ 054 public enum CaseFold { 055 /** 056 * 不转换(保留原样) 057 * <p>例如:Couchbase unquoted, 所有 quoted identifiers 058 */ 059 NONE, 060 061 /** 062 * 转大写 063 * <p>例如:Oracle unquoted, DB2 unquoted, Snowflake unquoted 064 */ 065 UPPER, 066 067 /** 068 * 转小写 069 * <p>例如:PostgreSQL unquoted, Redshift unquoted, Greenplum unquoted 070 */ 071 LOWER 072 } 073 074 /** 075 * 大小写比较规则(Case Comparison) 076 * 077 * <p>决定如何比较两个标识符是否相等 078 */ 079 public enum CaseCompare { 080 /** 081 * 大小写敏感 082 * <p>例如:Couchbase 所有标识符, 所有 quoted identifiers 083 */ 084 SENSITIVE, 085 086 /** 087 * 大小写不敏感 088 * <p>例如:Oracle unquoted, MySQL unquoted 089 */ 090 INSENSITIVE, 091 092 /** 093 * 基于 collation(运行时决定) 094 * <p>例如:SQL Server, Azure SQL 095 * <p>需要使用 {@link java.text.Collator} 进行比较 096 */ 097 COLLATION_BASED, 098 099 /** 100 * 与 unquoted 规则一致 101 * <p>例如:Presto quoted identifiers, Vertica quoted identifiers 102 * <p>在解析时需要回退到 unquotedCompare 103 */ 104 SAME_AS_UNQUOTED 105 } 106 107 // ===== 构造函数 ===== 108 109 /** 110 * 构造标识符规则 111 * 112 * @param unquotedFold unquoted 标识符的折叠规则 113 * @param unquotedCompare unquoted 标识符的比较规则 114 * @param quotedFold quoted 标识符的折叠规则 115 * @param quotedCompare quoted 标识符的比较规则 116 */ 117 public IdentifierRules(CaseFold unquotedFold, CaseCompare unquotedCompare, 118 CaseFold quotedFold, CaseCompare quotedCompare) { 119 this.unquotedFold = Objects.requireNonNull(unquotedFold, "unquotedFold"); 120 this.unquotedCompare = Objects.requireNonNull(unquotedCompare, "unquotedCompare"); 121 this.quotedFold = Objects.requireNonNull(quotedFold, "quotedFold"); 122 this.quotedCompare = Objects.requireNonNull(quotedCompare, "quotedCompare"); 123 } 124 125 // ===== 缓存的单例实例 ===== 126 127 private static final IdentifierRules ORACLE = new IdentifierRules( 128 CaseFold.UPPER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE); 129 130 private static final IdentifierRules POSTGRESQL = new IdentifierRules( 131 CaseFold.LOWER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE); 132 133 private static final IdentifierRules COUCHBASE = new IdentifierRules( 134 CaseFold.NONE, CaseCompare.SENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE); 135 136 private static final IdentifierRules PRESTO = new IdentifierRules( 137 CaseFold.LOWER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SAME_AS_UNQUOTED); 138 139 private static final IdentifierRules BIGQUERY_TABLE = new IdentifierRules( 140 CaseFold.NONE, CaseCompare.SENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE); 141 142 private static final IdentifierRules BIGQUERY_COLUMN = new IdentifierRules( 143 CaseFold.NONE, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.INSENSITIVE); 144 145 private static final IdentifierRules MYSQL_COLUMN = new IdentifierRules( 146 CaseFold.NONE, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.INSENSITIVE); 147 148 // ===== 预设规则工厂方法 ===== 149 150 /** 151 * Oracle 标识符规则 152 * 153 * <p><strong>实际数据库行为(Oracle 12c+):</strong> 154 * <ul> 155 * <li>Unquoted: 折叠为大写,比较不敏感 (CREATE TABLE foo → stored as FOO, foo=FOO=Foo) 156 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE "foo" → stored as foo, "foo"!="FOO") 157 * </ul> 158 * 159 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 160 * <ul> 161 * <li>columnCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 162 * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 163 * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 164 * <li>catalogCollationCaseSensitive = {@code false} → ✅ <strong>COMPATIBLE</strong> (both fold to UPPER) 165 * </ul> 166 * 167 * <p><strong>测试用例影响:</strong> 168 * <ul> 169 * <li>✅ 新规则正确:Oracle 确实将 unquoted identifiers 折叠为大写</li> 170 * <li>⚠️ 如果旧测试期望保留原始大小写 (如 "myTable" 保持为 "myTable"),则测试会失败</li> 171 * <li>⚠️ 应更新测试期望为大写 (如 "myTable" → "MYTABLE")</li> 172 * </ul> 173 * 174 * <p><strong>IdentifierRules 配置:</strong> 175 * <ul> 176 * <li>Unquoted: 折叠为大写 ({@link CaseFold#UPPER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE}) 177 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 178 * </ul> 179 */ 180 public static IdentifierRules forOracle() { 181 return ORACLE; 182 } 183 184 /** 185 * Dameng (达梦) 标识符规则(与 Oracle 相同) 186 * 187 * <p>Dameng follows Oracle identifier conventions: 188 * <ul> 189 * <li>Unquoted: 折叠为大写, 比较不敏感 190 * <li>Quoted: 保留原样, 比较敏感 191 * </ul> 192 */ 193 public static IdentifierRules forDameng() { 194 return forOracle(); 195 } 196 197 /** 198 * PostgreSQL / Redshift / Greenplum 标识符规则 199 * 200 * <p><strong>实际数据库行为(PostgreSQL 12+):</strong> 201 * <ul> 202 * <li>Unquoted: 折叠为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE) 203 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE "MyTable" → stored as MyTable, "MyTable"!="mytable") 204 * </ul> 205 * 206 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 207 * <ul> 208 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 209 * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 210 * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 211 * <li>catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 212 * </ul> 213 * 214 * <p><strong>测试用例影响:</strong> 215 * <ul> 216 * <li>✅ 新规则正确:PostgreSQL 确实将 unquoted identifiers 折叠为小写</li> 217 * <li>⚠️ 如果旧测试期望大写 (如 "MyTable" → "MYTABLE"),则测试会失败</li> 218 * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li> 219 * <li>⚠️ 如果旧测试期望保留原始大小写,也会失败</li> 220 * </ul> 221 * 222 * <p><strong>IdentifierRules 配置:</strong> 223 * <ul> 224 * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE}) 225 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 226 * </ul> 227 */ 228 public static IdentifierRules forPostgreSQL() { 229 return POSTGRESQL; 230 } 231 232 /** 233 * Couchbase N1QL 标识符规则 234 * 235 * <p><strong>实际数据库行为(Couchbase N1QL):</strong> 236 * <ul> 237 * <li>Unquoted: 保留原样,比较敏感 238 * <li>Quoted: 保留原样,比较敏感 239 * </ul> 240 * 241 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 242 * <ul> 243 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 244 * </ul> 245 * 246 * <p><strong>IdentifierRules 配置:</strong> 247 * <ul> 248 * <li>Unquoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 249 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 250 * </ul> 251 */ 252 public static IdentifierRules forCouchbase() { 253 return COUCHBASE; 254 } 255 256 /** 257 * SQL Server / Azure SQL 标识符规则 258 * 259 * <p><strong>实际数据库行为(SQL Server 2019+):</strong> 260 * <ul> 261 * <li>Unquoted: 保留原样,比较由 collation 决定 (CREATE TABLE MyTable → stored as MyTable) 262 * <li>Quoted: 保留原样,比较由 collation 决定 (CREATE TABLE [MyTable] → stored as MyTable) 263 * <li>默认 collation (SQL_Latin1_General_CP1_CI_AS): 大小写不敏感 (MyTable=mytable=MYTABLE) 264 * <li>CS collation (SQL_Latin1_General_CP1_CS_AS): 大小写敏感 (MyTable!=mytable) 265 * </ul> 266 * 267 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 268 * <ul> 269 * <li>columnCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 270 * <li>functionCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 271 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 272 * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 273 * </ul> 274 * 275 * <p><strong>测试用例影响:</strong> 276 * <ul> 277 * <li>✅ 新规则正确:SQL Server 保留标识符原始大小写,使用 collation 进行比较</li> 278 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 279 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li> 280 * <li>⚠️ 这是导致 dataflow 测试 processId 变化的根本原因!</li> 281 * <li>📝 参考: investigation_findings_2025_10_20.md</li> 282 * </ul> 283 * 284 * <p><strong>IdentifierRules 配置:</strong> 285 * <ul> 286 * <li>Unquoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED}) 287 * <li>Quoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED}) 288 * <li>需要配合 {@link CollatorProvider} 使用,默认使用 SQL_Latin1_General_CP1_CI_AS (大小写不敏感) 289 * </ul> 290 * 291 * <p><strong>注意:</strong>SQL Server 的大小写行为完全由 collation 决定,无法简单折叠。 292 */ 293 public static IdentifierRules forSQLServer() { 294 return forSQLServer(null); 295 } 296 297 /** 298 * SQL Server / Azure SQL 标识符规则(根据 collation 决定大小写比较) 299 * 300 * <p>SQL Server 的大小写行为完全由 collation 决定: 301 * <ul> 302 * <li>CI collation (如 SQL_Latin1_General_CP1_CI_AS): 大小写不敏感 303 * <li>CS collation (如 SQL_Latin1_General_CP1_CS_AS): 大小写敏感 304 * </ul> 305 * 306 * <p>Unquoted 和 quoted 标识符都保留原始大小写(CaseFold.NONE), 307 * 比较方式由 collation 名称中的 _CI_ 或 _CS_ 决定。 308 * 309 * @param collation SQL Server collation 名称(如 "SQL_Latin1_General_CP1_CS_AS"), 310 * 传 null 或空字符串使用默认(case-insensitive) 311 */ 312 public static IdentifierRules forSQLServer(String collation) { 313 CaseCompare compare = isCaseSensitiveCollation(collation) 314 ? CaseCompare.SENSITIVE 315 : CaseCompare.COLLATION_BASED; 316 return new IdentifierRules( 317 CaseFold.NONE, // unquoted 不折叠(保留原样) 318 compare, // 基于 collation 决定比较方式 319 CaseFold.NONE, // quoted 不折叠(保留原样) 320 compare // quoted 与 unquoted 一致 321 ); 322 } 323 324 /** 325 * 检查 SQL Server collation 名称是否为大小写敏感。 326 * 327 * <p>SQL Server collation 命名规则:名称中包含 "_CS_" 表示 case-sensitive, 328 * "_CI_" 表示 case-insensitive。 329 * 330 * @param collation collation 名称 331 * @return true 如果 collation 是 case-sensitive 332 */ 333 private static boolean isCaseSensitiveCollation(String collation) { 334 if (collation == null || collation.isEmpty()) { 335 return false; // 默认 case-insensitive 336 } 337 // SQL Server collation naming: _CS_ = case-sensitive, _CI_ = case-insensitive 338 String upper = collation.toUpperCase(); 339 return upper.contains("_CS_") || upper.endsWith("_CS"); 340 } 341 342 /** 343 * MySQL 标识符规则(table/schema names) 344 * 345 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 346 * <p>根据 {@code lower_case_table_names} 系统变量决定: 347 * <ul> 348 * <li>0 (Unix/Linux): 大小写敏感,保留原样 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable) 349 * <li>1 (Windows): 存储为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE) 350 * <li>2 (macOS): 存储保留原样,比较不敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable=mytable=MYTABLE) 351 * </ul> 352 * 353 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 354 * <ul> 355 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold or folds to LOWER) 356 * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold) 357 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new behavior depends on lower_case_table_names) 358 * <li>catalogCollationCaseSensitive = {@code true} → ⚠️ <strong>PARTIAL</strong> (legacy preserved case, new is insensitive) 359 * </ul> 360 * 361 * <p><strong>测试用例影响:</strong> 362 * <ul> 363 * <li>✅ 新规则正确:MySQL 的行为确实依赖 lower_case_table_names 设置</li> 364 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则在模式 1/2 下测试会失败</li> 365 * <li>⚠️ 模式 0: 应期望保留原样 (如 "MyTable" 保持为 "MyTable"),区分大小写</li> 366 * <li>⚠️ 模式 1: 应期望小写 (如 "MyTable" → "mytable"),不区分大小写</li> 367 * <li>⚠️ 模式 2: 应期望保留原样 (如 "MyTable" 保持为 "MyTable"),不区分大小写</li> 368 * </ul> 369 * 370 * <p><strong>IdentifierRules 配置:</strong> 371 * <ul> 372 * <li>模式 0: 不折叠 ({@link CaseFold#NONE}), 敏感 ({@link CaseCompare#SENSITIVE}) 373 * <li>模式 1: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 374 * <li>模式 2: 不折叠 ({@link CaseFold#NONE}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 375 * <li>Quoted: 保留原样,但也不敏感 (MySQL 特殊行为) 376 * </ul> 377 * 378 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2) 379 */ 380 public static IdentifierRules forMySQL(int lowerCaseTableNames) { 381 if (lowerCaseTableNames == 0) { 382 // Unix/Linux: 大小写敏感 383 return new IdentifierRules( 384 CaseFold.NONE, 385 CaseCompare.SENSITIVE, 386 CaseFold.NONE, 387 CaseCompare.SENSITIVE 388 ); 389 } else { 390 // Windows/macOS: 比较不敏感 391 CaseFold fold = (lowerCaseTableNames == 1) ? CaseFold.LOWER : CaseFold.NONE; 392 return new IdentifierRules( 393 fold, // 根据设置决定是否折叠 394 CaseCompare.INSENSITIVE, // 比较不敏感 395 CaseFold.NONE, // quoted 保留原样 396 CaseCompare.INSENSITIVE // quoted 也不敏感(MySQL 特性) 397 ); 398 } 399 } 400 401 /** 402 * MySQL 列名规则(始终大小写不敏感) 403 * 404 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 405 * <ul> 406 * <li>列名始终大小写不敏感,不受 lower_case_table_names 影响 407 * <li>存储时保留原样,比较时不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn) 408 * </ul> 409 * 410 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 411 * <ul> 412 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 413 * </ul> 414 */ 415 public static IdentifierRules forMySQLColumn() { 416 return MYSQL_COLUMN; 417 } 418 419 /** 420 * MySQL 函数名规则(始终大小写不敏感) 421 * 422 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 423 * <ul> 424 * <li>函数名/存储过程名始终大小写不敏感 425 * <li>存储时保留原样,比较时不敏感 (与列名相同) 426 * </ul> 427 * 428 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 429 * <ul> 430 * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 431 * </ul> 432 */ 433 public static IdentifierRules forMySQLRoutine() { 434 return forMySQLColumn(); 435 } 436 437 /** 438 * BigQuery 表名规则(大小写敏感) 439 * 440 * <p><strong>实际数据库行为(BigQuery Standard SQL):</strong> 441 * <ul> 442 * <li>Unquoted: 保留原样,比较敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable) 443 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE `MyTable` → stored as MyTable, MyTable!=mytable) 444 * <li>表名/dataset名/project名都是大小写敏感的</li> 445 * </ul> 446 * 447 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 448 * <ul> 449 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE) 450 * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE) 451 * </ul> 452 * 453 * <p><strong>测试用例影响:</strong> 454 * <ul> 455 * <li>✅ 新规则正确:BigQuery 表名确实是大小写敏感的</li> 456 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 457 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li> 458 * <li>⚠️ 旧代码可能错误地匹配了不同大小写的表名,新代码会正确拒绝</li> 459 * </ul> 460 */ 461 public static IdentifierRules forBigQueryTable() { 462 return BIGQUERY_TABLE; 463 } 464 465 /** 466 * BigQuery 列名规则(大小写不敏感) 467 * 468 * <p><strong>实际数据库行为(BigQuery Standard SQL):</strong> 469 * <ul> 470 * <li>Unquoted: 保留原样,比较不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn=MYCOLUMN) 471 * <li>Quoted: 保留原样,比较不敏感 (SELECT `MyColumn` → MyColumn=mycolumn) 472 * <li>列名是大小写不敏感的(与表名不同)</li> 473 * </ul> 474 * 475 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 476 * <ul> 477 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 478 * </ul> 479 * 480 * <p><strong>测试用例影响:</strong> 481 * <ul> 482 * <li>✅ 新规则正确:BigQuery 列名确实是大小写不敏感的</li> 483 * <li>⚠️ 如果旧测试期望 "MyColumn" → "MYCOLUMN",则测试会失败</li> 484 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyColumn" 保持为 "MyColumn")</li> 485 * <li>✅ 但比较时应忽略大小写 (MyColumn = mycolumn = MYCOLUMN)</li> 486 * </ul> 487 */ 488 public static IdentifierRules forBigQueryColumn() { 489 return BIGQUERY_COLUMN; 490 } 491 492 /** 493 * DB2 / Netezza / Exasol 标识符规则(与 Oracle 相同) 494 * 495 * <p><strong>实际数据库行为(DB2 11+):</strong> 496 * <ul> 497 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 498 * <li>Quoted: 保留原样,比较敏感 499 * </ul> 500 * 501 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 502 * <ul> 503 * <li>DB2 tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 504 * <li>DB2 catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 505 * <li>参见 {@link #forOracle()} 的详细说明 506 * </ul> 507 */ 508 public static IdentifierRules forDB2() { 509 return forOracle(); 510 } 511 512 /** 513 * Snowflake 标识符规则(与 Oracle 相同) 514 * 515 * <p><strong>实际数据库行为(Snowflake):</strong> 516 * <ul> 517 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 518 * <li>Quoted: 保留原样,比较敏感 519 * </ul> 520 * 521 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 522 * <ul> 523 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 524 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 525 * <li>参见 {@link #forOracle()} 的详细说明 526 * </ul> 527 */ 528 public static IdentifierRules forSnowflake() { 529 return forSnowflake(false); 530 } 531 532 /** 533 * Snowflake 标识符规则(根据 QUOTED_IDENTIFIERS_IGNORE_CASE 决定) 534 * 535 * <p>Snowflake 默认行为: 536 * <ul> 537 * <li>Unquoted: 折叠为大写,比较不敏感(与 Oracle 相同) 538 * <li>Quoted: 保留原样,比较敏感 539 * </ul> 540 * 541 * <p>当 QUOTED_IDENTIFIERS_IGNORE_CASE = TRUE 时: 542 * <ul> 543 * <li>Quoted 标识符也折叠为大写,比较不敏感 544 * </ul> 545 * 546 * @param quotedIdentifiersIgnoreCase Snowflake QUOTED_IDENTIFIERS_IGNORE_CASE 参数 547 */ 548 public static IdentifierRules forSnowflake(boolean quotedIdentifiersIgnoreCase) { 549 if (quotedIdentifiersIgnoreCase) { 550 // Quoted identifiers also fold to upper and are case-insensitive 551 return new IdentifierRules( 552 CaseFold.UPPER, 553 CaseCompare.INSENSITIVE, 554 CaseFold.UPPER, // quoted 也折叠为大写 555 CaseCompare.INSENSITIVE // quoted 也不敏感 556 ); 557 } 558 return forOracle(); // 默认:quoted 保留原样且敏感 559 } 560 561 /** 562 * SAP HANA 标识符规则(与 Oracle 相同) 563 * 564 * <p><strong>实际数据库行为(SAP HANA):</strong> 565 * <ul> 566 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 567 * <li>Quoted: 保留原样,比较敏感 568 * </ul> 569 * 570 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 571 * <ul> 572 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 573 * <li>参见 {@link #forOracle()} 的详细说明 574 * </ul> 575 */ 576 public static IdentifierRules forHANA() { 577 return forOracle(); 578 } 579 580 /** 581 * Presto / Trino 标识符规则 582 * 583 * <p><strong>实际数据库行为(Presto/Trino):</strong> 584 * <ul> 585 * <li>Unquoted: 折叠为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable) 586 * <li>Quoted: 保留原样,但与 unquoted 规则一致(比较时仍不敏感) 587 * </ul> 588 * 589 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 590 * <ul> 591 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 592 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 593 * </ul> 594 * 595 * <p><strong>测试用例影响:</strong> 596 * <ul> 597 * <li>✅ 新规则正确:Presto/Trino 折叠为小写,quoted 标识符与 unquoted 规则一致</li> 598 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 599 * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li> 600 * </ul> 601 * 602 * <p><strong>IdentifierRules 配置:</strong> 603 * <ul> 604 * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 605 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 与 unquoted 一致 ({@link CaseCompare#SAME_AS_UNQUOTED}) 606 * </ul> 607 */ 608 public static IdentifierRules forPresto() { 609 return PRESTO; 610 } 611 612 /** 613 * Vertica 标识符规则(与 Presto 相同) 614 * 615 * <p><strong>实际数据库行为(Vertica):</strong> 616 * <ul> 617 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Presto 相同) 618 * <li>Quoted: 保留原样,但与 unquoted 规则一致 619 * </ul> 620 * 621 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 622 * <ul> 623 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 624 * <li>参见 {@link #forPresto()} 的详细说明 625 * </ul> 626 */ 627 public static IdentifierRules forVertica() { 628 return forPresto(); 629 } 630 631 /** 632 * Hive / SparkSQL / Impala 标识符规则(与 PostgreSQL 相同) 633 * 634 * <p><strong>实际数据库行为(Hive 3+, SparkSQL 3+):</strong> 635 * <ul> 636 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同) 637 * <li>Quoted: 保留原样,比较敏感 638 * </ul> 639 * 640 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 641 * <ul> 642 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 643 * <li>参见 {@link #forPostgreSQL()} 的详细说明 644 * </ul> 645 */ 646 public static IdentifierRules forHive() { 647 return forPostgreSQL(); 648 } 649 650 /** 651 * Teradata 标识符规则(与 PostgreSQL 相同) 652 * 653 * <p><strong>实际数据库行为(Teradata 16+):</strong> 654 * <ul> 655 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同) 656 * <li>Quoted: 保留原样,比较敏感 657 * </ul> 658 * 659 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 660 * <ul> 661 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 662 * <li>参见 {@link #forPostgreSQL()} 的详细说明 663 * </ul> 664 */ 665 public static IdentifierRules forTeradata() { 666 return forPostgreSQL(); 667 } 668 669 /** 670 * Athena 标识符规则(与 Presto 相同) 671 * 672 * <p><strong>实际数据库行为(AWS Athena):</strong> 673 * <ul> 674 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Presto 相同,基于 Trino/Presto) 675 * <li>Quoted: 保留原样,但与 unquoted 规则一致 676 * </ul> 677 * 678 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 679 * <ul> 680 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 681 * <li>参见 {@link #forPresto()} 的详细说明 682 * </ul> 683 */ 684 public static IdentifierRules forAthena() { 685 return forPresto(); 686 } 687 688 /** 689 * GaussDB 标识符规则(与 PostgreSQL 相同) 690 * 691 * <p><strong>实际数据库行为(华为 GaussDB):</strong> 692 * <ul> 693 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同,基于 PostgreSQL) 694 * <li>Quoted: 保留原样,比较敏感 695 * </ul> 696 * 697 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 698 * <ul> 699 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 700 * <li>参见 {@link #forPostgreSQL()} 的详细说明 701 * </ul> 702 */ 703 public static IdentifierRules forGaussDB() { 704 return forPostgreSQL(); 705 } 706 707 /** 708 * Databricks 标识符规则(与 Hive 相同) 709 * 710 * <p><strong>实际数据库行为(Databricks SQL):</strong> 711 * <ul> 712 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Hive/SparkSQL 相同) 713 * <li>Quoted: 保留原样,比较敏感 714 * </ul> 715 * 716 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 717 * <ul> 718 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 719 * <li>参见 {@link #forHive()} 和 {@link #forPostgreSQL()} 的详细说明 720 * </ul> 721 */ 722 public static IdentifierRules forDatabricks() { 723 return forHive(); 724 } 725 726 /** 727 * Apache Flink SQL 的标识符规则(与 Hive/SparkSQL 相同) 728 * 729 * <p><strong>行为说明:</strong> 730 * <ul> 731 * <li>Flink SQL 基于 Apache Calcite,标识符处理与 Hive/SparkSQL 类似 732 * <li>未引用标识符:折叠为小写,比较时忽略大小写 733 * <li>引用标识符(反引号):保留原始大小写,比较时敏感 734 * </ul> 735 * 736 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 737 * <ul> 738 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> 739 * </ul> 740 */ 741 public static IdentifierRules forFlink() { 742 return forHive(); 743 } 744 745 /** 746 * Apache Doris 标识符规则(与 MySQL 相同) 747 * 748 * <p><strong>行为说明:</strong> 749 * <ul> 750 * <li>Doris 是 MySQL 兼容的 OLAP 数据库 751 * <li>标识符处理遵循 MySQL 的规则 752 * <li>表名/数据库名大小写敏感性取决于操作系统(类似 MySQL lower_case_table_names) 753 * <li>列名始终大小写不敏感 754 * </ul> 755 * 756 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 757 * <ul> 758 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> 759 * <li>catalogCollationCaseSensitive = {@code true} → ✅ <strong>COMPATIBLE</strong> 760 * </ul> 761 * 762 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2),默认使用 1 763 */ 764 public static IdentifierRules forDoris(int lowerCaseTableNames) { 765 return forMySQL(lowerCaseTableNames); 766 } 767 768 /** 769 * Apache Doris 标识符规则(默认配置,相当于 lower_case_table_names=1) 770 * 771 * <p><strong>行为说明:</strong> 772 * <ul> 773 * <li>Doris 是 MySQL 兼容的 OLAP 数据库 774 * <li>默认情况下,表名折叠为小写,比较不敏感 775 * </ul> 776 */ 777 public static IdentifierRules forDoris() { 778 return forMySQL(1); // Default to lowercase folding, case-insensitive 779 } 780 781 /** 782 * Apache Doris 列名规则(与 MySQL 列名相同,始终大小写不敏感) 783 */ 784 public static IdentifierRules forDorisColumn() { 785 return forMySQLColumn(); 786 } 787 788 /** 789 * StarRocks 标识符规则(带参数,相当于 MySQL 的 lower_case_table_names) 790 * 791 * <p><strong>行为说明:</strong> 792 * <ul> 793 * <li>StarRocks 是 Doris 的分支,MySQL 兼容的 OLAP 数据库 794 * <li>与 Doris 和 MySQL 使用相同的标识符规则 795 * </ul> 796 * 797 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2),默认使用 1 798 */ 799 public static IdentifierRules forStarrocks(int lowerCaseTableNames) { 800 return forMySQL(lowerCaseTableNames); 801 } 802 803 /** 804 * StarRocks 标识符规则(默认配置,相当于 lower_case_table_names=1) 805 * 806 * <p><strong>行为说明:</strong> 807 * <ul> 808 * <li>StarRocks 是 Doris 的分支,MySQL 兼容的 OLAP 数据库 809 * <li>默认情况下,表名折叠为小写,比较不敏感 810 * </ul> 811 */ 812 public static IdentifierRules forStarrocks() { 813 return forMySQL(1); // Default to lowercase folding, case-insensitive 814 } 815 816 /** 817 * StarRocks 列名规则(与 MySQL 列名相同,始终大小写不敏感) 818 */ 819 public static IdentifierRules forStarrocksColumn() { 820 return forMySQLColumn(); 821 } 822 823 /** 824 * SQLite 标识符规则(与 PostgreSQL 相同) 825 * 826 * <p><strong>实际数据库行为(SQLite 3.x):</strong> 827 * <ul> 828 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同, SQLite follows "What would PostgreSQL do?") 829 * <li>Quoted: 保留原样,比较敏感 830 * </ul> 831 */ 832 public static IdentifierRules forSqlite() { 833 return forPostgreSQL(); 834 } 835 836 /** 837 * 通用规则(默认:与 PostgreSQL 相同) 838 * 839 * <p><strong>说明:</strong> 840 * <ul> 841 * <li>当数据库类型未知或不在支持列表时使用此规则 842 * <li>默认采用 PostgreSQL 的行为(折叠为小写,比较不敏感) 843 * </ul> 844 * 845 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 846 * <ul> 847 * <li>defaultCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 848 * <li>参见 {@link #forPostgreSQL()} 的详细说明 849 * </ul> 850 */ 851 public static IdentifierRules forGeneric() { 852 return forPostgreSQL(); 853 } 854 855 // ===== Value semantics ===== 856 857 @Override 858 public boolean equals(Object o) { 859 if (this == o) return true; 860 if (o == null || getClass() != o.getClass()) return false; 861 IdentifierRules that = (IdentifierRules) o; 862 return unquotedFold == that.unquotedFold 863 && unquotedCompare == that.unquotedCompare 864 && quotedFold == that.quotedFold 865 && quotedCompare == that.quotedCompare; 866 } 867 868 @Override 869 public int hashCode() { 870 return Objects.hash(unquotedFold, unquotedCompare, quotedFold, quotedCompare); 871 } 872 873 // ===== toString 方法(用于调试) ===== 874 875 @Override 876 public String toString() { 877 return String.format("IdentifierRules{unquoted=%s/%s, quoted=%s/%s}", 878 unquotedFold, unquotedCompare, quotedFold, quotedCompare); 879 } 880}