001package gudusoft.gsqlparser.sqlenv; 002 003/** 004 * 标识符规则四元组(per vendor, per object group) 005 * 006 * <p>定义数据库厂商的标识符大小写规则,区分 quoted 和 unquoted 标识符的处理方式。 007 * 008 * <p>设计来源:dbobject_search.md 资深设计师方案 009 * 010 * <p>使用示例: 011 * <pre> 012 * // Oracle: unquoted 折叠为大写且不敏感,quoted 保留原样且敏感 013 * IdentifierRules oracleRules = IdentifierRules.forOracle(); 014 * 015 * // Couchbase: 全部大小写敏感 016 * IdentifierRules couchbaseRules = IdentifierRules.forCouchbase(); 017 * </pre> 018 * 019 * @since 3.1.0.9 020 */ 021public final class IdentifierRules { 022 023 // ===== 四元组定义 ===== 024 025 /** 026 * Unquoted 标识符的大小写折叠规则 027 */ 028 public final CaseFold unquotedFold; 029 030 /** 031 * Unquoted 标识符的大小写比较规则 032 */ 033 public final CaseCompare unquotedCompare; 034 035 /** 036 * Quoted 标识符的大小写折叠规则(通常为 NONE,保留原样) 037 */ 038 public final CaseFold quotedFold; 039 040 /** 041 * Quoted 标识符的大小写比较规则 042 */ 043 public final CaseCompare quotedCompare; 044 045 // ===== 枚举定义 ===== 046 047 /** 048 * 大小写折叠规则(Case Folding) 049 * 050 * <p>决定如何规范化标识符的大小写 051 */ 052 public enum CaseFold { 053 /** 054 * 不转换(保留原样) 055 * <p>例如:Couchbase unquoted, 所有 quoted identifiers 056 */ 057 NONE, 058 059 /** 060 * 转大写 061 * <p>例如:Oracle unquoted, DB2 unquoted, Snowflake unquoted 062 */ 063 UPPER, 064 065 /** 066 * 转小写 067 * <p>例如:PostgreSQL unquoted, Redshift unquoted, Greenplum unquoted 068 */ 069 LOWER 070 } 071 072 /** 073 * 大小写比较规则(Case Comparison) 074 * 075 * <p>决定如何比较两个标识符是否相等 076 */ 077 public enum CaseCompare { 078 /** 079 * 大小写敏感 080 * <p>例如:Couchbase 所有标识符, 所有 quoted identifiers 081 */ 082 SENSITIVE, 083 084 /** 085 * 大小写不敏感 086 * <p>例如:Oracle unquoted, MySQL unquoted 087 */ 088 INSENSITIVE, 089 090 /** 091 * 基于 collation(运行时决定) 092 * <p>例如:SQL Server, Azure SQL 093 * <p>需要使用 {@link java.text.Collator} 进行比较 094 */ 095 COLLATION_BASED, 096 097 /** 098 * 与 unquoted 规则一致 099 * <p>例如:Presto quoted identifiers, Vertica quoted identifiers 100 * <p>在解析时需要回退到 unquotedCompare 101 */ 102 SAME_AS_UNQUOTED 103 } 104 105 // ===== 构造函数 ===== 106 107 /** 108 * 构造标识符规则 109 * 110 * @param unquotedFold unquoted 标识符的折叠规则 111 * @param unquotedCompare unquoted 标识符的比较规则 112 * @param quotedFold quoted 标识符的折叠规则 113 * @param quotedCompare quoted 标识符的比较规则 114 */ 115 public IdentifierRules(CaseFold unquotedFold, CaseCompare unquotedCompare, 116 CaseFold quotedFold, CaseCompare quotedCompare) { 117 this.unquotedFold = unquotedFold; 118 this.unquotedCompare = unquotedCompare; 119 this.quotedFold = quotedFold; 120 this.quotedCompare = quotedCompare; 121 } 122 123 // ===== 预设规则工厂方法 ===== 124 125 /** 126 * Oracle 标识符规则 127 * 128 * <p><strong>实际数据库行为(Oracle 12c+):</strong> 129 * <ul> 130 * <li>Unquoted: 折叠为大写,比较不敏感 (CREATE TABLE foo → stored as FOO, foo=FOO=Foo) 131 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE "foo" → stored as foo, "foo"!="FOO") 132 * </ul> 133 * 134 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 135 * <ul> 136 * <li>columnCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 137 * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 138 * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 139 * <li>catalogCollationCaseSensitive = {@code false} → ✅ <strong>COMPATIBLE</strong> (both fold to UPPER) 140 * </ul> 141 * 142 * <p><strong>测试用例影响:</strong> 143 * <ul> 144 * <li>✅ 新规则正确:Oracle 确实将 unquoted identifiers 折叠为大写</li> 145 * <li>⚠️ 如果旧测试期望保留原始大小写 (如 "myTable" 保持为 "myTable"),则测试会失败</li> 146 * <li>⚠️ 应更新测试期望为大写 (如 "myTable" → "MYTABLE")</li> 147 * </ul> 148 * 149 * <p><strong>IdentifierRules 配置:</strong> 150 * <ul> 151 * <li>Unquoted: 折叠为大写 ({@link CaseFold#UPPER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE}) 152 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 153 * </ul> 154 */ 155 public static IdentifierRules forOracle() { 156 return new IdentifierRules( 157 CaseFold.UPPER, // unquoted 折叠为大写 158 CaseCompare.INSENSITIVE, // unquoted 比较不敏感 159 CaseFold.NONE, // quoted 保留原样 160 CaseCompare.SENSITIVE // quoted 比较敏感 161 ); 162 } 163 164 /** 165 * PostgreSQL / Redshift / Greenplum 标识符规则 166 * 167 * <p><strong>实际数据库行为(PostgreSQL 12+):</strong> 168 * <ul> 169 * <li>Unquoted: 折叠为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE) 170 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE "MyTable" → stored as MyTable, "MyTable"!="mytable") 171 * </ul> 172 * 173 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 174 * <ul> 175 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 176 * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 177 * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 178 * <li>catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER) 179 * </ul> 180 * 181 * <p><strong>测试用例影响:</strong> 182 * <ul> 183 * <li>✅ 新规则正确:PostgreSQL 确实将 unquoted identifiers 折叠为小写</li> 184 * <li>⚠️ 如果旧测试期望大写 (如 "MyTable" → "MYTABLE"),则测试会失败</li> 185 * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li> 186 * <li>⚠️ 如果旧测试期望保留原始大小写,也会失败</li> 187 * </ul> 188 * 189 * <p><strong>IdentifierRules 配置:</strong> 190 * <ul> 191 * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE}) 192 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 193 * </ul> 194 */ 195 public static IdentifierRules forPostgreSQL() { 196 return new IdentifierRules( 197 CaseFold.LOWER, // unquoted 折叠为小写 198 CaseCompare.INSENSITIVE, // unquoted 比较不敏感 199 CaseFold.NONE, // quoted 保留原样 200 CaseCompare.SENSITIVE // quoted 比较敏感 201 ); 202 } 203 204 /** 205 * Couchbase N1QL 标识符规则 206 * 207 * <p><strong>实际数据库行为(Couchbase N1QL):</strong> 208 * <ul> 209 * <li>Unquoted: 保留原样,比较敏感 210 * <li>Quoted: 保留原样,比较敏感 211 * </ul> 212 * 213 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 214 * <ul> 215 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 216 * </ul> 217 * 218 * <p><strong>IdentifierRules 配置:</strong> 219 * <ul> 220 * <li>Unquoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 221 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE}) 222 * </ul> 223 */ 224 public static IdentifierRules forCouchbase() { 225 return new IdentifierRules( 226 CaseFold.NONE, // unquoted 保留原样 227 CaseCompare.SENSITIVE, // unquoted 比较敏感 228 CaseFold.NONE, // quoted 保留原样 229 CaseCompare.SENSITIVE // quoted 比较敏感 230 ); 231 } 232 233 /** 234 * SQL Server / Azure SQL 标识符规则 235 * 236 * <p><strong>实际数据库行为(SQL Server 2019+):</strong> 237 * <ul> 238 * <li>Unquoted: 保留原样,比较由 collation 决定 (CREATE TABLE MyTable → stored as MyTable) 239 * <li>Quoted: 保留原样,比较由 collation 决定 (CREATE TABLE [MyTable] → stored as MyTable) 240 * <li>默认 collation (SQL_Latin1_General_CP1_CI_AS): 大小写不敏感 (MyTable=mytable=MYTABLE) 241 * <li>CS collation (SQL_Latin1_General_CP1_CS_AS): 大小写敏感 (MyTable!=mytable) 242 * </ul> 243 * 244 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 245 * <ul> 246 * <li>columnCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 247 * <li>functionCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 248 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 249 * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case) 250 * </ul> 251 * 252 * <p><strong>测试用例影响:</strong> 253 * <ul> 254 * <li>✅ 新规则正确:SQL Server 保留标识符原始大小写,使用 collation 进行比较</li> 255 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 256 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li> 257 * <li>⚠️ 这是导致 dataflow 测试 processId 变化的根本原因!</li> 258 * <li>📝 参考: investigation_findings_2025_10_20.md</li> 259 * </ul> 260 * 261 * <p><strong>IdentifierRules 配置:</strong> 262 * <ul> 263 * <li>Unquoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED}) 264 * <li>Quoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED}) 265 * <li>需要配合 {@link CollatorProvider} 使用,默认使用 SQL_Latin1_General_CP1_CI_AS (大小写不敏感) 266 * </ul> 267 * 268 * <p><strong>注意:</strong>SQL Server 的大小写行为完全由 collation 决定,无法简单折叠。 269 */ 270 public static IdentifierRules forSQLServer() { 271 return new IdentifierRules( 272 CaseFold.UPPER, // unquoted 不折叠(保留原样) 273 CaseCompare.COLLATION_BASED, // unquoted 基于 collation 比较 274 CaseFold.NONE, // quoted 不折叠(保留原样) 275 CaseCompare.COLLATION_BASED // quoted 基于 collation 比较 276 ); 277 } 278 279 /** 280 * MySQL 标识符规则(table/schema names) 281 * 282 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 283 * <p>根据 {@code lower_case_table_names} 系统变量决定: 284 * <ul> 285 * <li>0 (Unix/Linux): 大小写敏感,保留原样 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable) 286 * <li>1 (Windows): 存储为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE) 287 * <li>2 (macOS): 存储保留原样,比较不敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable=mytable=MYTABLE) 288 * </ul> 289 * 290 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 291 * <ul> 292 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold or folds to LOWER) 293 * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold) 294 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new behavior depends on lower_case_table_names) 295 * <li>catalogCollationCaseSensitive = {@code true} → ⚠️ <strong>PARTIAL</strong> (legacy preserved case, new is insensitive) 296 * </ul> 297 * 298 * <p><strong>测试用例影响:</strong> 299 * <ul> 300 * <li>✅ 新规则正确:MySQL 的行为确实依赖 lower_case_table_names 设置</li> 301 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则在模式 1/2 下测试会失败</li> 302 * <li>⚠️ 模式 0: 应期望保留原样 (如 "MyTable" 保持为 "MyTable"),区分大小写</li> 303 * <li>⚠️ 模式 1: 应期望小写 (如 "MyTable" → "mytable"),不区分大小写</li> 304 * <li>⚠️ 模式 2: 应期望保留原样 (如 "MyTable" 保持为 "MyTable"),不区分大小写</li> 305 * </ul> 306 * 307 * <p><strong>IdentifierRules 配置:</strong> 308 * <ul> 309 * <li>模式 0: 不折叠 ({@link CaseFold#NONE}), 敏感 ({@link CaseCompare#SENSITIVE}) 310 * <li>模式 1: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 311 * <li>模式 2: 不折叠 ({@link CaseFold#NONE}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 312 * <li>Quoted: 保留原样,但也不敏感 (MySQL 特殊行为) 313 * </ul> 314 * 315 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2) 316 */ 317 public static IdentifierRules forMySQL(int lowerCaseTableNames) { 318 if (lowerCaseTableNames == 0) { 319 // Unix/Linux: 大小写敏感 320 return new IdentifierRules( 321 CaseFold.NONE, 322 CaseCompare.SENSITIVE, 323 CaseFold.NONE, 324 CaseCompare.SENSITIVE 325 ); 326 } else { 327 // Windows/macOS: 比较不敏感 328 CaseFold fold = (lowerCaseTableNames == 1) ? CaseFold.LOWER : CaseFold.NONE; 329 return new IdentifierRules( 330 fold, // 根据设置决定是否折叠 331 CaseCompare.INSENSITIVE, // 比较不敏感 332 CaseFold.NONE, // quoted 保留原样 333 CaseCompare.INSENSITIVE // quoted 也不敏感(MySQL 特性) 334 ); 335 } 336 } 337 338 /** 339 * MySQL 列名规则(始终大小写不敏感) 340 * 341 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 342 * <ul> 343 * <li>列名始终大小写不敏感,不受 lower_case_table_names 影响 344 * <li>存储时保留原样,比较时不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn) 345 * </ul> 346 * 347 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 348 * <ul> 349 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 350 * </ul> 351 */ 352 public static IdentifierRules forMySQLColumn() { 353 return new IdentifierRules( 354 CaseFold.NONE, // 不折叠(但比较时不敏感) 355 CaseCompare.INSENSITIVE, 356 CaseFold.NONE, 357 CaseCompare.INSENSITIVE 358 ); 359 } 360 361 /** 362 * MySQL 函数名规则(始终大小写不敏感) 363 * 364 * <p><strong>实际数据库行为(MySQL 8.0+):</strong> 365 * <ul> 366 * <li>函数名/存储过程名始终大小写不敏感 367 * <li>存储时保留原样,比较时不敏感 (与列名相同) 368 * </ul> 369 * 370 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 371 * <ul> 372 * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 373 * </ul> 374 */ 375 public static IdentifierRules forMySQLRoutine() { 376 return forMySQLColumn(); 377 } 378 379 /** 380 * BigQuery 表名规则(大小写敏感) 381 * 382 * <p><strong>实际数据库行为(BigQuery Standard SQL):</strong> 383 * <ul> 384 * <li>Unquoted: 保留原样,比较敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable) 385 * <li>Quoted: 保留原样,比较敏感 (CREATE TABLE `MyTable` → stored as MyTable, MyTable!=mytable) 386 * <li>表名/dataset名/project名都是大小写敏感的</li> 387 * </ul> 388 * 389 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 390 * <ul> 391 * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE) 392 * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE) 393 * </ul> 394 * 395 * <p><strong>测试用例影响:</strong> 396 * <ul> 397 * <li>✅ 新规则正确:BigQuery 表名确实是大小写敏感的</li> 398 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 399 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li> 400 * <li>⚠️ 旧代码可能错误地匹配了不同大小写的表名,新代码会正确拒绝</li> 401 * </ul> 402 */ 403 public static IdentifierRules forBigQueryTable() { 404 return new IdentifierRules( 405 CaseFold.NONE, 406 CaseCompare.SENSITIVE, // 表名大小写敏感 407 CaseFold.NONE, 408 CaseCompare.SENSITIVE 409 ); 410 } 411 412 /** 413 * BigQuery 列名规则(大小写不敏感) 414 * 415 * <p><strong>实际数据库行为(BigQuery Standard SQL):</strong> 416 * <ul> 417 * <li>Unquoted: 保留原样,比较不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn=MYCOLUMN) 418 * <li>Quoted: 保留原样,比较不敏感 (SELECT `MyColumn` → MyColumn=mycolumn) 419 * <li>列名是大小写不敏感的(与表名不同)</li> 420 * </ul> 421 * 422 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 423 * <ul> 424 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE) 425 * </ul> 426 * 427 * <p><strong>测试用例影响:</strong> 428 * <ul> 429 * <li>✅ 新规则正确:BigQuery 列名确实是大小写不敏感的</li> 430 * <li>⚠️ 如果旧测试期望 "MyColumn" → "MYCOLUMN",则测试会失败</li> 431 * <li>⚠️ 应更新测试期望为保留原样 (如 "MyColumn" 保持为 "MyColumn")</li> 432 * <li>✅ 但比较时应忽略大小写 (MyColumn = mycolumn = MYCOLUMN)</li> 433 * </ul> 434 */ 435 public static IdentifierRules forBigQueryColumn() { 436 return new IdentifierRules( 437 CaseFold.NONE, 438 CaseCompare.INSENSITIVE, // 列名大小写不敏感 439 CaseFold.NONE, 440 CaseCompare.INSENSITIVE 441 ); 442 } 443 444 /** 445 * DB2 / Netezza / Exasol 标识符规则(与 Oracle 相同) 446 * 447 * <p><strong>实际数据库行为(DB2 11+):</strong> 448 * <ul> 449 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 450 * <li>Quoted: 保留原样,比较敏感 451 * </ul> 452 * 453 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 454 * <ul> 455 * <li>DB2 tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 456 * <li>DB2 catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER) 457 * <li>参见 {@link #forOracle()} 的详细说明 458 * </ul> 459 */ 460 public static IdentifierRules forDB2() { 461 return forOracle(); 462 } 463 464 /** 465 * Snowflake 标识符规则(与 Oracle 相同) 466 * 467 * <p><strong>实际数据库行为(Snowflake):</strong> 468 * <ul> 469 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 470 * <li>Quoted: 保留原样,比较敏感 471 * </ul> 472 * 473 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 474 * <ul> 475 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 476 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 477 * <li>参见 {@link #forOracle()} 的详细说明 478 * </ul> 479 */ 480 public static IdentifierRules forSnowflake() { 481 return forOracle(); 482 } 483 484 /** 485 * SAP HANA 标识符规则(与 Oracle 相同) 486 * 487 * <p><strong>实际数据库行为(SAP HANA):</strong> 488 * <ul> 489 * <li>Unquoted: 折叠为大写,比较不敏感 (与 Oracle 相同) 490 * <li>Quoted: 保留原样,比较敏感 491 * </ul> 492 * 493 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 494 * <ul> 495 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE) 496 * <li>参见 {@link #forOracle()} 的详细说明 497 * </ul> 498 */ 499 public static IdentifierRules forHANA() { 500 return forOracle(); 501 } 502 503 /** 504 * Presto / Trino 标识符规则 505 * 506 * <p><strong>实际数据库行为(Presto/Trino):</strong> 507 * <ul> 508 * <li>Unquoted: 折叠为小写,比较不敏感 (CREATE TABLE MyTable → stored as mytable) 509 * <li>Quoted: 保留原样,但与 unquoted 规则一致(比较时仍不敏感) 510 * </ul> 511 * 512 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 513 * <ul> 514 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 515 * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 516 * </ul> 517 * 518 * <p><strong>测试用例影响:</strong> 519 * <ul> 520 * <li>✅ 新规则正确:Presto/Trino 折叠为小写,quoted 标识符与 unquoted 规则一致</li> 521 * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE",则测试会失败</li> 522 * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li> 523 * </ul> 524 * 525 * <p><strong>IdentifierRules 配置:</strong> 526 * <ul> 527 * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE}) 528 * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 与 unquoted 一致 ({@link CaseCompare#SAME_AS_UNQUOTED}) 529 * </ul> 530 */ 531 public static IdentifierRules forPresto() { 532 return new IdentifierRules( 533 CaseFold.LOWER, 534 CaseCompare.INSENSITIVE, 535 CaseFold.NONE, 536 CaseCompare.SAME_AS_UNQUOTED // Presto 特性:quoted 与 unquoted 一致 537 ); 538 } 539 540 /** 541 * Vertica 标识符规则(与 Presto 相同) 542 * 543 * <p><strong>实际数据库行为(Vertica):</strong> 544 * <ul> 545 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Presto 相同) 546 * <li>Quoted: 保留原样,但与 unquoted 规则一致 547 * </ul> 548 * 549 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 550 * <ul> 551 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 552 * <li>参见 {@link #forPresto()} 的详细说明 553 * </ul> 554 */ 555 public static IdentifierRules forVertica() { 556 return forPresto(); 557 } 558 559 /** 560 * Hive / SparkSQL / Impala 标识符规则(与 PostgreSQL 相同) 561 * 562 * <p><strong>实际数据库行为(Hive 3+, SparkSQL 3+):</strong> 563 * <ul> 564 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同) 565 * <li>Quoted: 保留原样,比较敏感 566 * </ul> 567 * 568 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 569 * <ul> 570 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 571 * <li>参见 {@link #forPostgreSQL()} 的详细说明 572 * </ul> 573 */ 574 public static IdentifierRules forHive() { 575 return forPostgreSQL(); 576 } 577 578 /** 579 * Teradata 标识符规则(与 PostgreSQL 相同) 580 * 581 * <p><strong>实际数据库行为(Teradata 16+):</strong> 582 * <ul> 583 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同) 584 * <li>Quoted: 保留原样,比较敏感 585 * </ul> 586 * 587 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 588 * <ul> 589 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 590 * <li>参见 {@link #forPostgreSQL()} 的详细说明 591 * </ul> 592 */ 593 public static IdentifierRules forTeradata() { 594 return forPostgreSQL(); 595 } 596 597 /** 598 * Athena 标识符规则(与 Presto 相同) 599 * 600 * <p><strong>实际数据库行为(AWS Athena):</strong> 601 * <ul> 602 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Presto 相同,基于 Trino/Presto) 603 * <li>Quoted: 保留原样,但与 unquoted 规则一致 604 * </ul> 605 * 606 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 607 * <ul> 608 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 609 * <li>参见 {@link #forPresto()} 的详细说明 610 * </ul> 611 */ 612 public static IdentifierRules forAthena() { 613 return forPresto(); 614 } 615 616 /** 617 * GaussDB 标识符规则(与 PostgreSQL 相同) 618 * 619 * <p><strong>实际数据库行为(华为 GaussDB):</strong> 620 * <ul> 621 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同,基于 PostgreSQL) 622 * <li>Quoted: 保留原样,比较敏感 623 * </ul> 624 * 625 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 626 * <ul> 627 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 628 * <li>参见 {@link #forPostgreSQL()} 的详细说明 629 * </ul> 630 */ 631 public static IdentifierRules forGaussDB() { 632 return forPostgreSQL(); 633 } 634 635 /** 636 * Databricks 标识符规则(与 Hive 相同) 637 * 638 * <p><strong>实际数据库行为(Databricks SQL):</strong> 639 * <ul> 640 * <li>Unquoted: 折叠为小写,比较不敏感 (与 Hive/SparkSQL 相同) 641 * <li>Quoted: 保留原样,比较敏感 642 * </ul> 643 * 644 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 645 * <ul> 646 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 647 * <li>参见 {@link #forHive()} 和 {@link #forPostgreSQL()} 的详细说明 648 * </ul> 649 */ 650 public static IdentifierRules forDatabricks() { 651 return forHive(); 652 } 653 654 /** 655 * Apache Flink SQL 的标识符规则(与 Hive/SparkSQL 相同) 656 * 657 * <p><strong>行为说明:</strong> 658 * <ul> 659 * <li>Flink SQL 基于 Apache Calcite,标识符处理与 Hive/SparkSQL 类似 660 * <li>未引用标识符:折叠为小写,比较时忽略大小写 661 * <li>引用标识符(反引号):保留原始大小写,比较时敏感 662 * </ul> 663 * 664 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 665 * <ul> 666 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> 667 * </ul> 668 */ 669 public static IdentifierRules forFlink() { 670 return forHive(); 671 } 672 673 /** 674 * Apache Doris 标识符规则(与 MySQL 相同) 675 * 676 * <p><strong>行为说明:</strong> 677 * <ul> 678 * <li>Doris 是 MySQL 兼容的 OLAP 数据库 679 * <li>标识符处理遵循 MySQL 的规则 680 * <li>表名/数据库名大小写敏感性取决于操作系统(类似 MySQL lower_case_table_names) 681 * <li>列名始终大小写不敏感 682 * </ul> 683 * 684 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 685 * <ul> 686 * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> 687 * <li>catalogCollationCaseSensitive = {@code true} → ✅ <strong>COMPATIBLE</strong> 688 * </ul> 689 * 690 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2),默认使用 1 691 */ 692 public static IdentifierRules forDoris(int lowerCaseTableNames) { 693 return forMySQL(lowerCaseTableNames); 694 } 695 696 /** 697 * Apache Doris 标识符规则(默认配置,相当于 lower_case_table_names=1) 698 * 699 * <p><strong>行为说明:</strong> 700 * <ul> 701 * <li>Doris 是 MySQL 兼容的 OLAP 数据库 702 * <li>默认情况下,表名折叠为小写,比较不敏感 703 * </ul> 704 */ 705 public static IdentifierRules forDoris() { 706 return forMySQL(1); // Default to lowercase folding, case-insensitive 707 } 708 709 /** 710 * Apache Doris 列名规则(与 MySQL 列名相同,始终大小写不敏感) 711 */ 712 public static IdentifierRules forDorisColumn() { 713 return forMySQLColumn(); 714 } 715 716 /** 717 * StarRocks 标识符规则(带参数,相当于 MySQL 的 lower_case_table_names) 718 * 719 * <p><strong>行为说明:</strong> 720 * <ul> 721 * <li>StarRocks 是 Doris 的分支,MySQL 兼容的 OLAP 数据库 722 * <li>与 Doris 和 MySQL 使用相同的标识符规则 723 * </ul> 724 * 725 * @param lowerCaseTableNames {@code lower_case_table_names} 值(0, 1, 2),默认使用 1 726 */ 727 public static IdentifierRules forStarrocks(int lowerCaseTableNames) { 728 return forMySQL(lowerCaseTableNames); 729 } 730 731 /** 732 * StarRocks 标识符规则(默认配置,相当于 lower_case_table_names=1) 733 * 734 * <p><strong>行为说明:</strong> 735 * <ul> 736 * <li>StarRocks 是 Doris 的分支,MySQL 兼容的 OLAP 数据库 737 * <li>默认情况下,表名折叠为小写,比较不敏感 738 * </ul> 739 */ 740 public static IdentifierRules forStarrocks() { 741 return forMySQL(1); // Default to lowercase folding, case-insensitive 742 } 743 744 /** 745 * StarRocks 列名规则(与 MySQL 列名相同,始终大小写不敏感) 746 */ 747 public static IdentifierRules forStarrocksColumn() { 748 return forMySQLColumn(); 749 } 750 751 /** 752 * SQLite 标识符规则(与 PostgreSQL 相同) 753 * 754 * <p><strong>实际数据库行为(SQLite 3.x):</strong> 755 * <ul> 756 * <li>Unquoted: 折叠为小写,比较不敏感 (与 PostgreSQL 相同, SQLite follows "What would PostgreSQL do?") 757 * <li>Quoted: 保留原样,比较敏感 758 * </ul> 759 */ 760 public static IdentifierRules forSqlite() { 761 return forPostgreSQL(); 762 } 763 764 /** 765 * 通用规则(默认:与 PostgreSQL 相同) 766 * 767 * <p><strong>说明:</strong> 768 * <ul> 769 * <li>当数据库类型未知或不在支持列表时使用此规则 770 * <li>默认采用 PostgreSQL 的行为(折叠为小写,比较不敏感) 771 * </ul> 772 * 773 * <p><strong>与 Legacy TSQLEnv 兼容性:</strong> 774 * <ul> 775 * <li>defaultCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER) 776 * <li>参见 {@link #forPostgreSQL()} 的详细说明 777 * </ul> 778 */ 779 public static IdentifierRules forGeneric() { 780 return forPostgreSQL(); 781 } 782 783 // ===== toString 方法(用于调试) ===== 784 785 @Override 786 public String toString() { 787 return String.format("IdentifierRules{unquoted=%s/%s, quoted=%s/%s}", 788 unquotedFold, unquotedCompare, quotedFold, quotedCompare); 789 } 790}