Source code

001package gudusoft.gsqlparser.sqlenv;
002
003import java.util.Objects;
004
005/**
006 * 标识符规则四元组（per vendor, per object group）
007 *
008 * <p>定义数据库厂商的标识符大小写规则，区分 quoted 和 unquoted 标识符的处理方式。
009 *
010 * <p>设计来源：dbobject_search.md 资深设计师方案
011 *
012 * <p>使用示例：
013 * <pre>
014 * // Oracle: unquoted 折叠为大写且不敏感，quoted 保留原样且敏感
015 * IdentifierRules oracleRules = IdentifierRules.forOracle();
016 *
017 * // Couchbase: 全部大小写敏感
018 * IdentifierRules couchbaseRules = IdentifierRules.forCouchbase();
019 * </pre>
020 *
021 * @since 3.1.0.9
022 */
023public final class IdentifierRules {
024
025    // ===== 四元组定义 =====
026
027    /**
028     * Unquoted 标识符的大小写折叠规则
029     */
030    public final CaseFold unquotedFold;
031
032    /**
033     * Unquoted 标识符的大小写比较规则
034     */
035    public final CaseCompare unquotedCompare;
036
037    /**
038     * Quoted 标识符的大小写折叠规则（通常为 NONE，保留原样）
039     */
040    public final CaseFold quotedFold;
041
042    /**
043     * Quoted 标识符的大小写比较规则
044     */
045    public final CaseCompare quotedCompare;
046
047    // ===== 枚举定义 =====
048
049    /**
050     * 大小写折叠规则（Case Folding）
051     *
052     * <p>决定如何规范化标识符的大小写
053     */
054    public enum CaseFold {
055        /**
056         * 不转换（保留原样）
057         * <p>例如：Couchbase unquoted, 所有 quoted identifiers
058         */
059        NONE,
060
061        /**
062         * 转大写
063         * <p>例如：Oracle unquoted, DB2 unquoted, Snowflake unquoted
064         */
065        UPPER,
066
067        /**
068         * 转小写
069         * <p>例如：PostgreSQL unquoted, Redshift unquoted, Greenplum unquoted
070         */
071        LOWER
072    }
073
074    /**
075     * 大小写比较规则（Case Comparison）
076     *
077     * <p>决定如何比较两个标识符是否相等
078     */
079    public enum CaseCompare {
080        /**
081         * 大小写敏感
082         * <p>例如：Couchbase 所有标识符, 所有 quoted identifiers
083         */
084        SENSITIVE,
085
086        /**
087         * 大小写不敏感
088         * <p>例如：Oracle unquoted, MySQL unquoted
089         */
090        INSENSITIVE,
091
092        /**
093         * 基于 collation（运行时决定）
094         * <p>例如：SQL Server, Azure SQL
095         * <p>需要使用 {@link java.text.Collator} 进行比较
096         */
097        COLLATION_BASED,
098
099        /**
100         * 与 unquoted 规则一致
101         * <p>例如：Presto quoted identifiers, Vertica quoted identifiers
102         * <p>在解析时需要回退到 unquotedCompare
103         */
104        SAME_AS_UNQUOTED
105    }
106
107    // ===== 构造函数 =====
108
109    /**
110     * 构造标识符规则
111     *
112     * @param unquotedFold unquoted 标识符的折叠规则
113     * @param unquotedCompare unquoted 标识符的比较规则
114     * @param quotedFold quoted 标识符的折叠规则
115     * @param quotedCompare quoted 标识符的比较规则
116     */
117    public IdentifierRules(CaseFold unquotedFold, CaseCompare unquotedCompare,
118                           CaseFold quotedFold, CaseCompare quotedCompare) {
119        this.unquotedFold = Objects.requireNonNull(unquotedFold, "unquotedFold");
120        this.unquotedCompare = Objects.requireNonNull(unquotedCompare, "unquotedCompare");
121        this.quotedFold = Objects.requireNonNull(quotedFold, "quotedFold");
122        this.quotedCompare = Objects.requireNonNull(quotedCompare, "quotedCompare");
123    }
124
125    // ===== 缓存的单例实例 =====
126
127    private static final IdentifierRules ORACLE = new IdentifierRules(
128        CaseFold.UPPER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE);
129
130    private static final IdentifierRules POSTGRESQL = new IdentifierRules(
131        CaseFold.LOWER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE);
132
133    private static final IdentifierRules COUCHBASE = new IdentifierRules(
134        CaseFold.NONE, CaseCompare.SENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE);
135
136    private static final IdentifierRules PRESTO = new IdentifierRules(
137        CaseFold.LOWER, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.SAME_AS_UNQUOTED);
138
139    private static final IdentifierRules BIGQUERY_TABLE = new IdentifierRules(
140        CaseFold.NONE, CaseCompare.SENSITIVE, CaseFold.NONE, CaseCompare.SENSITIVE);
141
142    private static final IdentifierRules BIGQUERY_COLUMN = new IdentifierRules(
143        CaseFold.NONE, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.INSENSITIVE);
144
145    private static final IdentifierRules MYSQL_COLUMN = new IdentifierRules(
146        CaseFold.NONE, CaseCompare.INSENSITIVE, CaseFold.NONE, CaseCompare.INSENSITIVE);
147
148    // ===== 预设规则工厂方法 =====
149
150    /**
151     * Oracle 标识符规则
152     *
153     * <p><strong>实际数据库行为（Oracle 12c+）：</strong>
154     * <ul>
155     * <li>Unquoted: 折叠为大写，比较不敏感 (CREATE TABLE foo → stored as FOO, foo=FOO=Foo)
156     * <li>Quoted: 保留原样，比较敏感 (CREATE TABLE "foo" → stored as foo, "foo"!="FOO")
157     * </ul>
158     *
159     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
160     * <ul>
161     * <li>columnCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER)
162     * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER)
163     * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER)
164     * <li>catalogCollationCaseSensitive = {@code false} → ✅ <strong>COMPATIBLE</strong> (both fold to UPPER)
165     * </ul>
166     *
167     * <p><strong>测试用例影响：</strong>
168     * <ul>
169     * <li>✅ 新规则正确：Oracle 确实将 unquoted identifiers 折叠为大写</li>
170     * <li>⚠️ 如果旧测试期望保留原始大小写 (如 "myTable" 保持为 "myTable")，则测试会失败</li>
171     * <li>⚠️ 应更新测试期望为大写 (如 "myTable" → "MYTABLE")</li>
172     * </ul>
173     *
174     * <p><strong>IdentifierRules 配置：</strong>
175     * <ul>
176     * <li>Unquoted: 折叠为大写 ({@link CaseFold#UPPER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE})
177     * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE})
178     * </ul>
179     */
180    public static IdentifierRules forOracle() {
181        return ORACLE;
182    }
183
184    /**
185     * Dameng (达梦) 标识符规则（与 Oracle 相同）
186     *
187     * <p>Dameng follows Oracle identifier conventions:
188     * <ul>
189     * <li>Unquoted: 折叠为大写, 比较不敏感
190     * <li>Quoted: 保留原样, 比较敏感
191     * </ul>
192     */
193    public static IdentifierRules forDameng() {
194        return forOracle();
195    }
196
197    /**
198     * PostgreSQL / Redshift / Greenplum 标识符规则
199     *
200     * <p><strong>实际数据库行为（PostgreSQL 12+）：</strong>
201     * <ul>
202     * <li>Unquoted: 折叠为小写，比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE)
203     * <li>Quoted: 保留原样，比较敏感 (CREATE TABLE "MyTable" → stored as MyTable, "MyTable"!="mytable")
204     * </ul>
205     *
206     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
207     * <ul>
208     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
209     * <li>functionCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER)
210     * <li>tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER)
211     * <li>catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to LOWER)
212     * </ul>
213     *
214     * <p><strong>测试用例影响：</strong>
215     * <ul>
216     * <li>✅ 新规则正确：PostgreSQL 确实将 unquoted identifiers 折叠为小写</li>
217     * <li>⚠️ 如果旧测试期望大写 (如 "MyTable" → "MYTABLE")，则测试会失败</li>
218     * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li>
219     * <li>⚠️ 如果旧测试期望保留原始大小写，也会失败</li>
220     * </ul>
221     *
222     * <p><strong>IdentifierRules 配置：</strong>
223     * <ul>
224     * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 比较不敏感 ({@link CaseCompare#INSENSITIVE})
225     * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE})
226     * </ul>
227     */
228    public static IdentifierRules forPostgreSQL() {
229        return POSTGRESQL;
230    }
231
232    /**
233     * Couchbase N1QL 标识符规则
234     *
235     * <p><strong>实际数据库行为（Couchbase N1QL）：</strong>
236     * <ul>
237     * <li>Unquoted: 保留原样，比较敏感
238     * <li>Quoted: 保留原样，比较敏感
239     * </ul>
240     *
241     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
242     * <ul>
243     * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case)
244     * </ul>
245     *
246     * <p><strong>IdentifierRules 配置：</strong>
247     * <ul>
248     * <li>Unquoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE})
249     * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 比较敏感 ({@link CaseCompare#SENSITIVE})
250     * </ul>
251     */
252    public static IdentifierRules forCouchbase() {
253        return COUCHBASE;
254    }
255
256    /**
257     * SQL Server / Azure SQL 标识符规则
258     *
259     * <p><strong>实际数据库行为（SQL Server 2019+）：</strong>
260     * <ul>
261     * <li>Unquoted: 保留原样，比较由 collation 决定 (CREATE TABLE MyTable → stored as MyTable)
262     * <li>Quoted: 保留原样，比较由 collation 决定 (CREATE TABLE [MyTable] → stored as MyTable)
263     * <li>默认 collation (SQL_Latin1_General_CP1_CI_AS): 大小写不敏感 (MyTable=mytable=MYTABLE)
264     * <li>CS collation (SQL_Latin1_General_CP1_CS_AS): 大小写敏感 (MyTable!=mytable)
265     * </ul>
266     *
267     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
268     * <ul>
269     * <li>columnCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case)
270     * <li>functionCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case)
271     * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case)
272     * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case)
273     * </ul>
274     *
275     * <p><strong>测试用例影响：</strong>
276     * <ul>
277     * <li>✅ 新规则正确：SQL Server 保留标识符原始大小写，使用 collation 进行比较</li>
278     * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE"，则测试会失败</li>
279     * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li>
280     * <li>⚠️ 这是导致 dataflow 测试 processId 变化的根本原因！</li>
281     * <li>📝 参考: investigation_findings_2025_10_20.md</li>
282     * </ul>
283     *
284     * <p><strong>IdentifierRules 配置：</strong>
285     * <ul>
286     * <li>Unquoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED})
287     * <li>Quoted: 不折叠 ({@link CaseFold#NONE}), 基于 collation 比较 ({@link CaseCompare#COLLATION_BASED})
288     * <li>需要配合 {@link CollatorProvider} 使用，默认使用 SQL_Latin1_General_CP1_CI_AS (大小写不敏感)
289     * </ul>
290     *
291     * <p><strong>注意：</strong>SQL Server 的大小写行为完全由 collation 决定，无法简单折叠。
292     */
293    public static IdentifierRules forSQLServer() {
294        return forSQLServer(null);
295    }
296
297    /**
298     * SQL Server / Azure SQL 标识符规则（根据 collation 决定大小写比较）
299     *
300     * <p>SQL Server 的大小写行为完全由 collation 决定：
301     * <ul>
302     * <li>CI collation (如 SQL_Latin1_General_CP1_CI_AS): 大小写不敏感
303     * <li>CS collation (如 SQL_Latin1_General_CP1_CS_AS): 大小写敏感
304     * </ul>
305     *
306     * <p>Unquoted 和 quoted 标识符都保留原始大小写（CaseFold.NONE），
307     * 比较方式由 collation 名称中的 _CI_ 或 _CS_ 决定。
308     *
309     * @param collation SQL Server collation 名称（如 "SQL_Latin1_General_CP1_CS_AS"），
310     *                  传 null 或空字符串使用默认（case-insensitive）
311     */
312    public static IdentifierRules forSQLServer(String collation) {
313        CaseCompare compare = isCaseSensitiveCollation(collation)
314            ? CaseCompare.SENSITIVE
315            : CaseCompare.COLLATION_BASED;
316        return new IdentifierRules(
317            CaseFold.NONE,   // unquoted 不折叠（保留原样）
318            compare,         // 基于 collation 决定比较方式
319            CaseFold.NONE,   // quoted 不折叠（保留原样）
320            compare          // quoted 与 unquoted 一致
321        );
322    }
323
324    /**
325     * 检查 SQL Server collation 名称是否为大小写敏感。
326     *
327     * <p>SQL Server collation 命名规则：名称中包含 "_CS_" 表示 case-sensitive，
328     * "_CI_" 表示 case-insensitive。
329     *
330     * @param collation collation 名称
331     * @return true 如果 collation 是 case-sensitive
332     */
333    private static boolean isCaseSensitiveCollation(String collation) {
334        if (collation == null || collation.isEmpty()) {
335            return false;  // 默认 case-insensitive
336        }
337        // SQL Server collation naming: _CS_ = case-sensitive, _CI_ = case-insensitive
338        String upper = collation.toUpperCase();
339        return upper.contains("_CS_") || upper.endsWith("_CS");
340    }
341
342    /**
343     * MySQL 标识符规则（table/schema names）
344     *
345     * <p><strong>实际数据库行为（MySQL 8.0+）：</strong>
346     * <p>根据 {@code lower_case_table_names} 系统变量决定：
347     * <ul>
348     * <li>0 (Unix/Linux): 大小写敏感，保留原样 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable)
349     * <li>1 (Windows): 存储为小写，比较不敏感 (CREATE TABLE MyTable → stored as mytable, MyTable=mytable=MYTABLE)
350     * <li>2 (macOS): 存储保留原样，比较不敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable=mytable=MYTABLE)
351     * </ul>
352     *
353     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
354     * <ul>
355     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold or folds to LOWER)
356     * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new doesn't fold)
357     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new behavior depends on lower_case_table_names)
358     * <li>catalogCollationCaseSensitive = {@code true} → ⚠️ <strong>PARTIAL</strong> (legacy preserved case, new is insensitive)
359     * </ul>
360     *
361     * <p><strong>测试用例影响：</strong>
362     * <ul>
363     * <li>✅ 新规则正确：MySQL 的行为确实依赖 lower_case_table_names 设置</li>
364     * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE"，则在模式 1/2 下测试会失败</li>
365     * <li>⚠️ 模式 0: 应期望保留原样 (如 "MyTable" 保持为 "MyTable")，区分大小写</li>
366     * <li>⚠️ 模式 1: 应期望小写 (如 "MyTable" → "mytable")，不区分大小写</li>
367     * <li>⚠️ 模式 2: 应期望保留原样 (如 "MyTable" 保持为 "MyTable")，不区分大小写</li>
368     * </ul>
369     *
370     * <p><strong>IdentifierRules 配置：</strong>
371     * <ul>
372     * <li>模式 0: 不折叠 ({@link CaseFold#NONE}), 敏感 ({@link CaseCompare#SENSITIVE})
373     * <li>模式 1: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE})
374     * <li>模式 2: 不折叠 ({@link CaseFold#NONE}), 不敏感 ({@link CaseCompare#INSENSITIVE})
375     * <li>Quoted: 保留原样，但也不敏感 (MySQL 特殊行为)
376     * </ul>
377     *
378     * @param lowerCaseTableNames {@code lower_case_table_names} 值（0, 1, 2）
379     */
380    public static IdentifierRules forMySQL(int lowerCaseTableNames) {
381        if (lowerCaseTableNames == 0) {
382            // Unix/Linux: 大小写敏感
383            return new IdentifierRules(
384                CaseFold.NONE,
385                CaseCompare.SENSITIVE,
386                CaseFold.NONE,
387                CaseCompare.SENSITIVE
388            );
389        } else {
390            // Windows/macOS: 比较不敏感
391            CaseFold fold = (lowerCaseTableNames == 1) ? CaseFold.LOWER : CaseFold.NONE;
392            return new IdentifierRules(
393                fold,                     // 根据设置决定是否折叠
394                CaseCompare.INSENSITIVE,  // 比较不敏感
395                CaseFold.NONE,            // quoted 保留原样
396                CaseCompare.INSENSITIVE   // quoted 也不敏感（MySQL 特性）
397            );
398        }
399    }
400
401    /**
402     * MySQL 列名规则（始终大小写不敏感）
403     *
404     * <p><strong>实际数据库行为（MySQL 8.0+）：</strong>
405     * <ul>
406     * <li>列名始终大小写不敏感，不受 lower_case_table_names 影响
407     * <li>存储时保留原样，比较时不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn)
408     * </ul>
409     *
410     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
411     * <ul>
412     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE)
413     * </ul>
414     */
415    public static IdentifierRules forMySQLColumn() {
416        return MYSQL_COLUMN;
417    }
418
419    /**
420     * MySQL 函数名规则（始终大小写不敏感）
421     *
422     * <p><strong>实际数据库行为（MySQL 8.0+）：</strong>
423     * <ul>
424     * <li>函数名/存储过程名始终大小写不敏感
425     * <li>存储时保留原样，比较时不敏感 (与列名相同)
426     * </ul>
427     *
428     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
429     * <ul>
430     * <li>functionCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE)
431     * </ul>
432     */
433    public static IdentifierRules forMySQLRoutine() {
434        return forMySQLColumn();
435    }
436
437    /**
438     * BigQuery 表名规则（大小写敏感）
439     *
440     * <p><strong>实际数据库行为（BigQuery Standard SQL）：</strong>
441     * <ul>
442     * <li>Unquoted: 保留原样，比较敏感 (CREATE TABLE MyTable → stored as MyTable, MyTable!=mytable)
443     * <li>Quoted: 保留原样，比较敏感 (CREATE TABLE `MyTable` → stored as MyTable, MyTable!=mytable)
444     * <li>表名/dataset名/project名都是大小写敏感的</li>
445     * </ul>
446     *
447     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
448     * <ul>
449     * <li>tableCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE)
450     * <li>catalogCollationCaseSensitive = {@code false} → ❌ <strong>INCOMPATIBLE</strong> (legacy folded to UPPER, new preserves case and is SENSITIVE)
451     * </ul>
452     *
453     * <p><strong>测试用例影响：</strong>
454     * <ul>
455     * <li>✅ 新规则正确：BigQuery 表名确实是大小写敏感的</li>
456     * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE"，则测试会失败</li>
457     * <li>⚠️ 应更新测试期望为保留原样 (如 "MyTable" 保持为 "MyTable")</li>
458     * <li>⚠️ 旧代码可能错误地匹配了不同大小写的表名，新代码会正确拒绝</li>
459     * </ul>
460     */
461    public static IdentifierRules forBigQueryTable() {
462        return BIGQUERY_TABLE;
463    }
464
465    /**
466     * BigQuery 列名规则（大小写不敏感）
467     *
468     * <p><strong>实际数据库行为（BigQuery Standard SQL）：</strong>
469     * <ul>
470     * <li>Unquoted: 保留原样，比较不敏感 (SELECT MyColumn → stored as MyColumn, MyColumn=mycolumn=MYCOLUMN)
471     * <li>Quoted: 保留原样，比较不敏感 (SELECT `MyColumn` → MyColumn=mycolumn)
472     * <li>列名是大小写不敏感的（与表名不同）</li>
473     * </ul>
474     *
475     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
476     * <ul>
477     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new preserves case but is INSENSITIVE)
478     * </ul>
479     *
480     * <p><strong>测试用例影响：</strong>
481     * <ul>
482     * <li>✅ 新规则正确：BigQuery 列名确实是大小写不敏感的</li>
483     * <li>⚠️ 如果旧测试期望 "MyColumn" → "MYCOLUMN"，则测试会失败</li>
484     * <li>⚠️ 应更新测试期望为保留原样 (如 "MyColumn" 保持为 "MyColumn")</li>
485     * <li>✅ 但比较时应忽略大小写 (MyColumn = mycolumn = MYCOLUMN)</li>
486     * </ul>
487     */
488    public static IdentifierRules forBigQueryColumn() {
489        return BIGQUERY_COLUMN;
490    }
491
492    /**
493     * DB2 / Netezza / Exasol 标识符规则（与 Oracle 相同）
494     *
495     * <p><strong>实际数据库行为（DB2 11+）：</strong>
496     * <ul>
497     * <li>Unquoted: 折叠为大写，比较不敏感 (与 Oracle 相同)
498     * <li>Quoted: 保留原样，比较敏感
499     * </ul>
500     *
501     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
502     * <ul>
503     * <li>DB2 tableCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER)
504     * <li>DB2 catalogCollationCaseSensitive = {@code true} → ❌ <strong>INCOMPATIBLE</strong> (legacy preserved case, new folds to UPPER)
505     * <li>参见 {@link #forOracle()} 的详细说明
506     * </ul>
507     */
508    public static IdentifierRules forDB2() {
509        return forOracle();
510    }
511
512    /**
513     * Snowflake 标识符规则（与 Oracle 相同）
514     *
515     * <p><strong>实际数据库行为（Snowflake）：</strong>
516     * <ul>
517     * <li>Unquoted: 折叠为大写，比较不敏感 (与 Oracle 相同)
518     * <li>Quoted: 保留原样，比较敏感
519     * </ul>
520     *
521     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
522     * <ul>
523     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE)
524     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE)
525     * <li>参见 {@link #forOracle()} 的详细说明
526     * </ul>
527     */
528    public static IdentifierRules forSnowflake() {
529        return forSnowflake(false);
530    }
531
532    /**
533     * Snowflake 标识符规则（根据 QUOTED_IDENTIFIERS_IGNORE_CASE 决定）
534     *
535     * <p>Snowflake 默认行为：
536     * <ul>
537     * <li>Unquoted: 折叠为大写，比较不敏感（与 Oracle 相同）
538     * <li>Quoted: 保留原样，比较敏感
539     * </ul>
540     *
541     * <p>当 QUOTED_IDENTIFIERS_IGNORE_CASE = TRUE 时：
542     * <ul>
543     * <li>Quoted 标识符也折叠为大写，比较不敏感
544     * </ul>
545     *
546     * @param quotedIdentifiersIgnoreCase Snowflake QUOTED_IDENTIFIERS_IGNORE_CASE 参数
547     */
548    public static IdentifierRules forSnowflake(boolean quotedIdentifiersIgnoreCase) {
549        if (quotedIdentifiersIgnoreCase) {
550            // Quoted identifiers also fold to upper and are case-insensitive
551            return new IdentifierRules(
552                CaseFold.UPPER,
553                CaseCompare.INSENSITIVE,
554                CaseFold.UPPER,           // quoted 也折叠为大写
555                CaseCompare.INSENSITIVE   // quoted 也不敏感
556            );
557        }
558        return forOracle();  // 默认：quoted 保留原样且敏感
559    }
560
561    /**
562     * SAP HANA 标识符规则（与 Oracle 相同）
563     *
564     * <p><strong>实际数据库行为（SAP HANA）：</strong>
565     * <ul>
566     * <li>Unquoted: 折叠为大写，比较不敏感 (与 Oracle 相同)
567     * <li>Quoted: 保留原样，比较敏感
568     * </ul>
569     *
570     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
571     * <ul>
572     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (both fold to UPPER, COMPATIBLE)
573     * <li>参见 {@link #forOracle()} 的详细说明
574     * </ul>
575     */
576    public static IdentifierRules forHANA() {
577        return forOracle();
578    }
579
580    /**
581     * Presto / Trino 标识符规则
582     *
583     * <p><strong>实际数据库行为（Presto/Trino）：</strong>
584     * <ul>
585     * <li>Unquoted: 折叠为小写，比较不敏感 (CREATE TABLE MyTable → stored as mytable)
586     * <li>Quoted: 保留原样，但与 unquoted 规则一致（比较时仍不敏感）
587     * </ul>
588     *
589     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
590     * <ul>
591     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
592     * <li>columnCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
593     * </ul>
594     *
595     * <p><strong>测试用例影响：</strong>
596     * <ul>
597     * <li>✅ 新规则正确：Presto/Trino 折叠为小写，quoted 标识符与 unquoted 规则一致</li>
598     * <li>⚠️ 如果旧测试期望 "MyTable" → "MYTABLE"，则测试会失败</li>
599     * <li>⚠️ 应更新测试期望为小写 (如 "MyTable" → "mytable")</li>
600     * </ul>
601     *
602     * <p><strong>IdentifierRules 配置：</strong>
603     * <ul>
604     * <li>Unquoted: 折叠为小写 ({@link CaseFold#LOWER}), 不敏感 ({@link CaseCompare#INSENSITIVE})
605     * <li>Quoted: 保留原样 ({@link CaseFold#NONE}), 与 unquoted 一致 ({@link CaseCompare#SAME_AS_UNQUOTED})
606     * </ul>
607     */
608    public static IdentifierRules forPresto() {
609        return PRESTO;
610    }
611
612    /**
613     * Vertica 标识符规则（与 Presto 相同）
614     *
615     * <p><strong>实际数据库行为（Vertica）：</strong>
616     * <ul>
617     * <li>Unquoted: 折叠为小写，比较不敏感 (与 Presto 相同)
618     * <li>Quoted: 保留原样，但与 unquoted 规则一致
619     * </ul>
620     *
621     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
622     * <ul>
623     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
624     * <li>参见 {@link #forPresto()} 的详细说明
625     * </ul>
626     */
627    public static IdentifierRules forVertica() {
628        return forPresto();
629    }
630
631    /**
632     * Hive / SparkSQL / Impala 标识符规则（与 PostgreSQL 相同）
633     *
634     * <p><strong>实际数据库行为（Hive 3+, SparkSQL 3+）：</strong>
635     * <ul>
636     * <li>Unquoted: 折叠为小写，比较不敏感 (与 PostgreSQL 相同)
637     * <li>Quoted: 保留原样，比较敏感
638     * </ul>
639     *
640     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
641     * <ul>
642     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
643     * <li>参见 {@link #forPostgreSQL()} 的详细说明
644     * </ul>
645     */
646    public static IdentifierRules forHive() {
647        return forPostgreSQL();
648    }
649
650    /**
651     * Teradata 标识符规则（与 PostgreSQL 相同）
652     *
653     * <p><strong>实际数据库行为（Teradata 16+）：</strong>
654     * <ul>
655     * <li>Unquoted: 折叠为小写，比较不敏感 (与 PostgreSQL 相同)
656     * <li>Quoted: 保留原样，比较敏感
657     * </ul>
658     *
659     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
660     * <ul>
661     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
662     * <li>参见 {@link #forPostgreSQL()} 的详细说明
663     * </ul>
664     */
665    public static IdentifierRules forTeradata() {
666        return forPostgreSQL();
667    }
668
669    /**
670     * Athena 标识符规则（与 Presto 相同）
671     *
672     * <p><strong>实际数据库行为（AWS Athena）：</strong>
673     * <ul>
674     * <li>Unquoted: 折叠为小写，比较不敏感 (与 Presto 相同，基于 Trino/Presto)
675     * <li>Quoted: 保留原样，但与 unquoted 规则一致
676     * </ul>
677     *
678     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
679     * <ul>
680     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
681     * <li>参见 {@link #forPresto()} 的详细说明
682     * </ul>
683     */
684    public static IdentifierRules forAthena() {
685        return forPresto();
686    }
687
688    /**
689     * GaussDB 标识符规则（与 PostgreSQL 相同）
690     *
691     * <p><strong>实际数据库行为（华为 GaussDB）：</strong>
692     * <ul>
693     * <li>Unquoted: 折叠为小写，比较不敏感 (与 PostgreSQL 相同，基于 PostgreSQL)
694     * <li>Quoted: 保留原样，比较敏感
695     * </ul>
696     *
697     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
698     * <ul>
699     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
700     * <li>参见 {@link #forPostgreSQL()} 的详细说明
701     * </ul>
702     */
703    public static IdentifierRules forGaussDB() {
704        return forPostgreSQL();
705    }
706
707    /**
708     * Databricks 标识符规则（与 Hive 相同）
709     *
710     * <p><strong>实际数据库行为（Databricks SQL）：</strong>
711     * <ul>
712     * <li>Unquoted: 折叠为小写，比较不敏感 (与 Hive/SparkSQL 相同)
713     * <li>Quoted: 保留原样，比较敏感
714     * </ul>
715     *
716     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
717     * <ul>
718     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
719     * <li>参见 {@link #forHive()} 和 {@link #forPostgreSQL()} 的详细说明
720     * </ul>
721     */
722    public static IdentifierRules forDatabricks() {
723        return forHive();
724    }
725
726    /**
727     * Apache Flink SQL 的标识符规则（与 Hive/SparkSQL 相同）
728     *
729     * <p><strong>行为说明：</strong>
730     * <ul>
731     * <li>Flink SQL 基于 Apache Calcite，标识符处理与 Hive/SparkSQL 类似
732     * <li>未引用标识符：折叠为小写，比较时忽略大小写
733     * <li>引用标识符（反引号）：保留原始大小写，比较时敏感
734     * </ul>
735     *
736     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
737     * <ul>
738     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong>
739     * </ul>
740     */
741    public static IdentifierRules forFlink() {
742        return forHive();
743    }
744
745    /**
746     * Apache Doris 标识符规则（与 MySQL 相同）
747     *
748     * <p><strong>行为说明：</strong>
749     * <ul>
750     * <li>Doris 是 MySQL 兼容的 OLAP 数据库
751     * <li>标识符处理遵循 MySQL 的规则
752     * <li>表名/数据库名大小写敏感性取决于操作系统（类似 MySQL lower_case_table_names）
753     * <li>列名始终大小写不敏感
754     * </ul>
755     *
756     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
757     * <ul>
758     * <li>tableCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong>
759     * <li>catalogCollationCaseSensitive = {@code true} → ✅ <strong>COMPATIBLE</strong>
760     * </ul>
761     *
762     * @param lowerCaseTableNames {@code lower_case_table_names} 值（0, 1, 2），默认使用 1
763     */
764    public static IdentifierRules forDoris(int lowerCaseTableNames) {
765        return forMySQL(lowerCaseTableNames);
766    }
767
768    /**
769     * Apache Doris 标识符规则（默认配置，相当于 lower_case_table_names=1）
770     *
771     * <p><strong>行为说明：</strong>
772     * <ul>
773     * <li>Doris 是 MySQL 兼容的 OLAP 数据库
774     * <li>默认情况下，表名折叠为小写，比较不敏感
775     * </ul>
776     */
777    public static IdentifierRules forDoris() {
778        return forMySQL(1);  // Default to lowercase folding, case-insensitive
779    }
780
781    /**
782     * Apache Doris 列名规则（与 MySQL 列名相同，始终大小写不敏感）
783     */
784    public static IdentifierRules forDorisColumn() {
785        return forMySQLColumn();
786    }
787
788    /**
789     * StarRocks 标识符规则（带参数，相当于 MySQL 的 lower_case_table_names）
790     *
791     * <p><strong>行为说明：</strong>
792     * <ul>
793     * <li>StarRocks 是 Doris 的分支，MySQL 兼容的 OLAP 数据库
794     * <li>与 Doris 和 MySQL 使用相同的标识符规则
795     * </ul>
796     *
797     * @param lowerCaseTableNames {@code lower_case_table_names} 值（0, 1, 2），默认使用 1
798     */
799    public static IdentifierRules forStarrocks(int lowerCaseTableNames) {
800        return forMySQL(lowerCaseTableNames);
801    }
802
803    /**
804     * StarRocks 标识符规则（默认配置，相当于 lower_case_table_names=1）
805     *
806     * <p><strong>行为说明：</strong>
807     * <ul>
808     * <li>StarRocks 是 Doris 的分支，MySQL 兼容的 OLAP 数据库
809     * <li>默认情况下，表名折叠为小写，比较不敏感
810     * </ul>
811     */
812    public static IdentifierRules forStarrocks() {
813        return forMySQL(1);  // Default to lowercase folding, case-insensitive
814    }
815
816    /**
817     * StarRocks 列名规则（与 MySQL 列名相同，始终大小写不敏感）
818     */
819    public static IdentifierRules forStarrocksColumn() {
820        return forMySQLColumn();
821    }
822
823    /**
824     * SQLite 标识符规则（与 PostgreSQL 相同）
825     *
826     * <p><strong>实际数据库行为（SQLite 3.x）：</strong>
827     * <ul>
828     * <li>Unquoted: 折叠为小写，比较不敏感 (与 PostgreSQL 相同, SQLite follows "What would PostgreSQL do?")
829     * <li>Quoted: 保留原样，比较敏感
830     * </ul>
831     */
832    public static IdentifierRules forSqlite() {
833        return forPostgreSQL();
834    }
835
836    /**
837     * 通用规则（默认：与 PostgreSQL 相同）
838     *
839     * <p><strong>说明：</strong>
840     * <ul>
841     * <li>当数据库类型未知或不在支持列表时使用此规则
842     * <li>默认采用 PostgreSQL 的行为（折叠为小写，比较不敏感）
843     * </ul>
844     *
845     * <p><strong>与 Legacy TSQLEnv 兼容性：</strong>
846     * <ul>
847     * <li>defaultCollationCaseSensitive = {@code false} → ⚠️ <strong>PARTIAL</strong> (legacy folded to UPPER, new folds to LOWER)
848     * <li>参见 {@link #forPostgreSQL()} 的详细说明
849     * </ul>
850     */
851    public static IdentifierRules forGeneric() {
852        return forPostgreSQL();
853    }
854
855    // ===== Value semantics =====
856
857    @Override
858    public boolean equals(Object o) {
859        if (this == o) return true;
860        if (o == null || getClass() != o.getClass()) return false;
861        IdentifierRules that = (IdentifierRules) o;
862        return unquotedFold == that.unquotedFold
863            && unquotedCompare == that.unquotedCompare
864            && quotedFold == that.quotedFold
865            && quotedCompare == that.quotedCompare;
866    }
867
868    @Override
869    public int hashCode() {
870        return Objects.hash(unquotedFold, unquotedCompare, quotedFold, quotedCompare);
871    }
872
873    // ===== toString 方法（用于调试） =====
874
875    @Override
876    public String toString() {
877        return String.format("IdentifierRules{unquoted=%s/%s, quoted=%s/%s}",
878            unquotedFold, unquotedCompare, quotedFold, quotedCompare);
879    }
880}