Source code

001package gudusoft.gsqlparser.catalog.input;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.catalog.diagnostic.CatalogDiagnostic;
005import gudusoft.gsqlparser.catalog.diagnostic.CatalogDiagnosticCode;
006import gudusoft.gsqlparser.catalog.diagnostic.CatalogDiagnosticSeverity;
007import gudusoft.gsqlparser.catalog.input.model.CatalogModel;
008import gudusoft.gsqlparser.catalog.input.model.ColumnModel;
009import gudusoft.gsqlparser.catalog.input.model.ConstraintModel;
010import gudusoft.gsqlparser.catalog.input.model.IdentifierConfig;
011import gudusoft.gsqlparser.catalog.input.model.IndexModel;
012import gudusoft.gsqlparser.catalog.input.model.RoutineModel;
013import gudusoft.gsqlparser.catalog.input.model.SchemaModel;
014import gudusoft.gsqlparser.catalog.input.model.SequenceModel;
015import gudusoft.gsqlparser.catalog.input.model.SynonymModel;
016import gudusoft.gsqlparser.catalog.input.model.TableModel;
017import gudusoft.gsqlparser.catalog.input.model.UnifiedCatalogModel;
018import gudusoft.gsqlparser.catalog.input.model.ViewModel;
019import gudusoft.gsqlparser.catalog.runtime.CatalogIdentifierPolicy;
020import gudusoft.gsqlparser.catalog.runtime.CatalogObjectKind;
021import gudusoft.gsqlparser.sqlenv.ESQLDataObjectType;
022import gudusoft.gsqlparser.sqlenv.IdentifierService;
023import gudusoft.gsqlparser.sqlenv.TSQLEnv;
024
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.LinkedHashMap;
028import java.util.List;
029import java.util.Map;
030
031/**
032 * Validates a {@link UnifiedCatalogModel} against required-field rules, duplicate-name
033 * checks, identifier-bypass detection, and table↔column referential integrity.
034 *
035 * <p>Plan §7.1 / §9.6. Identifier-bypass detection rejects models whose names disagree
036 * with {@code IdentifierService.normalize(...)} — catching adapters that sneak in a
037 * third-party canonicalization step. Strict-mode escalation is the caller's
038 * responsibility (see {@code CatalogLoadOptions.strict()} and §15).</p>
039 *
040 * <p>Validation is best-effort: it never throws; every problem is reported as a
041 * {@link CatalogDiagnostic} on the returned {@link CatalogValidationResult}. ERROR
042 * diagnostics fail validation; WARN/INFO do not.</p>
043 *
044 * <p>Duplicate detection uses {@link IdentifierService#areEqual} rather than a
045 * normalize-then-{@code Set} short-circuit: in dialects where the compare rule is
046 * case-insensitive but the canonical normalize preserves the input spelling
047 * (e.g., BigQuery columns, MySQL columns under {@code lower_case_table_names=2},
048 * MSSQL with a case-insensitive collation), the keyset alone misses real
049 * duplicates. The cost is O(n²) per scope, which is acceptable since each scope
050 * is a single schema or table column list.</p>
051 */
052public final class CatalogModelValidator {
053
054    public CatalogModelValidator() {
055        // No state.
056    }
057
058    public CatalogValidationResult validate(UnifiedCatalogModel model, CatalogLoadOptions options) {
059        if (model == null) {
060            throw new IllegalArgumentException("CatalogModelValidator.validate: model is required");
061        }
062        List<CatalogDiagnostic> diagnostics = new ArrayList<CatalogDiagnostic>();
063        EDbVendor vendor = model.vendor();
064        IdentifierConfig cfg = effectiveConfig(model, options, diagnostics);
065        IdentifierService service = CatalogIdentifierPolicy.identifierServiceFor(cfg, vendor);
066        boolean normalizeOnLoad = options == null || options.normalizeOnLoad();
067
068        IdentifierBucket catalogBucket = new IdentifierBucket(service);
069        for (CatalogModel c : model.catalogs()) {
070            validateCatalog(c, vendor, service, normalizeOnLoad, catalogBucket, diagnostics);
071        }
072        return CatalogValidationResult.of(diagnostics);
073    }
074
075    // ---------- catalog / schema / table / column traversal ----------
076
077    private void validateCatalog(CatalogModel c,
078                                 EDbVendor vendor,
079                                 IdentifierService service,
080                                 boolean normalizeOnLoad,
081                                 IdentifierBucket catalogBucket,
082                                 List<CatalogDiagnostic> diagnostics) {
083        String name = c.name();
084        checkRequired("catalog", name, diagnostics);
085        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotCatalog,
086            "catalog '" + name + "'", diagnostics);
087        flagIfDuplicate(catalogBucket, name, ESQLDataObjectType.dotCatalog, CatalogObjectKind.CATALOG,
088            "catalog '" + name + "'", diagnostics);
089        // Build a catalog-wide table index so FK constraints can look up their target
090        // table across schemas. Keys are normalized via IdentifierService so the lookup
091        // honors per-vendor identifier folding (Oracle uppercase, PG lowercase, etc.).
092        CatalogTableIndex tableIndex = CatalogTableIndex.build(c, service);
093        IdentifierBucket schemaBucket = new IdentifierBucket(service);
094        for (SchemaModel s : c.schemas()) {
095            validateSchema(s, vendor, service, normalizeOnLoad, c.name(), schemaBucket,
096                tableIndex, diagnostics);
097        }
098    }
099
100    private void validateSchema(SchemaModel s,
101                                EDbVendor vendor,
102                                IdentifierService service,
103                                boolean normalizeOnLoad,
104                                String catalogName,
105                                IdentifierBucket schemaBucket,
106                                CatalogTableIndex tableIndex,
107                                List<CatalogDiagnostic> diagnostics) {
108        String name = s.name();
109        if (name == null) {
110            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_MISSING_DEFAULT,
111                "schema in catalog '" + catalogName + "' has null name"));
112            return;
113        }
114        if (name.isEmpty()) {
115            // Empty schema names are only legal for dialects without a schema layer
116            // (per TSQLEnv.supportSchema): MySQL, Teradata, Hive, Impala. For every
117            // other dialect — Oracle, PostgreSQL, MSSQL, BigQuery, etc. — an empty
118            // schema name silently re-qualifies child objects as catalog.object,
119            // which is a manifest bug worth surfacing.
120            if (TSQLEnv.supportSchema(vendor)) {
121                diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_MISSING_DEFAULT,
122                    "schema in catalog '" + catalogName
123                        + "' has empty name; vendor " + vendor + " requires a schema layer"));
124            }
125        } else {
126            checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotSchema,
127                "schema '" + catalogName + "." + name + "'", diagnostics);
128        }
129        flagIfDuplicate(schemaBucket, name, ESQLDataObjectType.dotSchema, CatalogObjectKind.SCHEMA,
130            "schema '" + catalogName + "." + name + "'", diagnostics);
131
132        // The existing catalog storage (TSQLCatalog) keeps table / view / routine /
133        // synonym / sequence indexes type-specific, so a schema may legitimately hold
134        // a table named 'Foo' and a function named 'Foo' that resolve by requested
135        // type. We honor that by using one bucket per object-type namespace. Tables
136        // and views share a bucket because in every dialect we cover (Oracle, MSSQL,
137        // PostgreSQL, MySQL, BigQuery) a CREATE TABLE and a CREATE VIEW with the same
138        // unqualified name in the same schema is a real collision.
139        IdentifierBucket tableViewBucket = new IdentifierBucket(service);
140        for (TableModel t : s.tables()) {
141            validateTable(t, vendor, service, normalizeOnLoad, catalogName, name,
142                tableViewBucket, tableIndex, diagnostics);
143        }
144        for (ViewModel v : s.views()) {
145            validateView(v, vendor, service, normalizeOnLoad, catalogName, name,
146                tableViewBucket, diagnostics);
147        }
148        IdentifierBucket routineBucket = new IdentifierBucket(service);
149        for (RoutineModel r : s.routines()) {
150            validateRoutine(r, vendor, service, normalizeOnLoad, catalogName, name,
151                routineBucket, diagnostics);
152        }
153        IdentifierBucket synonymBucket = new IdentifierBucket(service);
154        for (SynonymModel sy : s.synonyms()) {
155            validateSynonym(sy, vendor, service, normalizeOnLoad, catalogName, name,
156                synonymBucket, diagnostics);
157        }
158        IdentifierBucket sequenceBucket = new IdentifierBucket(service);
159        for (SequenceModel sq : s.sequences()) {
160            validateSequence(sq, vendor, service, normalizeOnLoad, catalogName, name,
161                sequenceBucket, diagnostics);
162        }
163    }
164
165    private void validateTable(TableModel t,
166                               EDbVendor vendor,
167                               IdentifierService service,
168                               boolean normalizeOnLoad,
169                               String catalogName,
170                               String schemaName,
171                               IdentifierBucket objectBucket,
172                               CatalogTableIndex tableIndex,
173                               List<CatalogDiagnostic> diagnostics) {
174        String name = t.name();
175        String location = qualified(catalogName, schemaName, name);
176        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotTable,
177            "table '" + location + "'", diagnostics);
178        flagIfDuplicate(objectBucket, name, ESQLDataObjectType.dotTable, CatalogObjectKind.TABLE,
179            "table '" + location + "'", diagnostics);
180
181        IdentifierBucket columnBucket = new IdentifierBucket(service);
182        for (ColumnModel c : t.columns()) {
183            validateColumn(c, vendor, service, normalizeOnLoad, location, columnBucket, diagnostics);
184        }
185
186        IdentifierBucket constraintBucket = new IdentifierBucket(service);
187        for (ConstraintModel cs : t.constraints()) {
188            String csName = cs.name();
189            if (csName != null && !csName.isEmpty()) {
190                flagIfDuplicate(constraintBucket, csName, ESQLDataObjectType.dotUnknown,
191                    CatalogObjectKind.CONSTRAINT,
192                    "constraint '" + csName + "' on table '" + location + "'", diagnostics);
193            }
194            for (String col : cs.columns()) {
195                if (!columnBucket.contains(col, ESQLDataObjectType.dotColumn)) {
196                    diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_DUPLICATE_NAME,
197                        "constraint '" + (csName == null ? "<unnamed>" : csName)
198                            + "' on table '" + location + "' references unknown column '" + col + "'"));
199                }
200            }
201            if (isForeignKeyType(cs.type())) {
202                validateForeignKey(cs, location, schemaName, vendor, service, tableIndex, diagnostics);
203            }
204        }
205
206        IdentifierBucket indexBucket = new IdentifierBucket(service);
207        for (IndexModel ix : t.indexes()) {
208            flagIfDuplicate(indexBucket, ix.name(), ESQLDataObjectType.dotUnknown,
209                CatalogObjectKind.INDEX,
210                "index '" + ix.name() + "' on table '" + location + "'", diagnostics);
211            for (String col : ix.columns()) {
212                if (!columnBucket.contains(col, ESQLDataObjectType.dotColumn)) {
213                    diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_DUPLICATE_NAME,
214                        "index '" + ix.name() + "' on table '" + location
215                            + "' references unknown column '" + col + "'"));
216                }
217            }
218        }
219    }
220
221    private void validateView(ViewModel v,
222                              EDbVendor vendor,
223                              IdentifierService service,
224                              boolean normalizeOnLoad,
225                              String catalogName,
226                              String schemaName,
227                              IdentifierBucket objectBucket,
228                              List<CatalogDiagnostic> diagnostics) {
229        String name = v.name();
230        String location = qualified(catalogName, schemaName, name);
231        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotTable,
232            "view '" + location + "'", diagnostics);
233        flagIfDuplicate(objectBucket, name, ESQLDataObjectType.dotTable,
234            v.materialized() ? CatalogObjectKind.MATERIALIZED_VIEW : CatalogObjectKind.VIEW,
235            "view '" + location + "'", diagnostics);
236        IdentifierBucket columnBucket = new IdentifierBucket(service);
237        for (ColumnModel c : v.columns()) {
238            validateColumn(c, vendor, service, normalizeOnLoad, location, columnBucket, diagnostics);
239        }
240    }
241
242    private void validateRoutine(RoutineModel r,
243                                 EDbVendor vendor,
244                                 IdentifierService service,
245                                 boolean normalizeOnLoad,
246                                 String catalogName,
247                                 String schemaName,
248                                 IdentifierBucket objectBucket,
249                                 List<CatalogDiagnostic> diagnostics) {
250        String name = r.name();
251        String location = qualified(catalogName, schemaName, name);
252        // For diagnostic-message accuracy use the kind-specific leaf type when running
253        // the bypass check; for duplicate detection always compare with dotFunction
254        // because IdentifierProfile only routes dotFunction through ROUTINE_GROUP —
255        // dotProcedure / dotOraclePackage / dotRoutine fall back to NAME_GROUP. Using
256        // dotFunction here means MySQL procedures and packages share the routine
257        // case-insensitivity rule, which matches the actual MySQL behavior (stored
258        // routine names are case-insensitive regardless of routine kind).
259        ESQLDataObjectType bypassType = leafTypeFor(r.kind());
260        ESQLDataObjectType compareType = ESQLDataObjectType.dotFunction;
261        String label = labelFor(r.kind());
262        checkBypass(vendor, service, normalizeOnLoad, name, bypassType,
263            label + " '" + location + "'", diagnostics);
264        flagIfDuplicate(objectBucket, name, compareType, r.kind(),
265            label + " '" + location + "'", diagnostics);
266    }
267
268    private void validateSynonym(SynonymModel sy,
269                                 EDbVendor vendor,
270                                 IdentifierService service,
271                                 boolean normalizeOnLoad,
272                                 String catalogName,
273                                 String schemaName,
274                                 IdentifierBucket objectBucket,
275                                 List<CatalogDiagnostic> diagnostics) {
276        String name = sy.name();
277        String location = qualified(catalogName, schemaName, name);
278        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotSynonyms,
279            "synonym '" + location + "'", diagnostics);
280        flagIfDuplicate(objectBucket, name, ESQLDataObjectType.dotSynonyms, CatalogObjectKind.SYNONYM,
281            "synonym '" + location + "'", diagnostics);
282    }
283
284    private void validateSequence(SequenceModel sq,
285                                  EDbVendor vendor,
286                                  IdentifierService service,
287                                  boolean normalizeOnLoad,
288                                  String catalogName,
289                                  String schemaName,
290                                  IdentifierBucket objectBucket,
291                                  List<CatalogDiagnostic> diagnostics) {
292        String name = sq.name();
293        String location = qualified(catalogName, schemaName, name);
294        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotUnknown,
295            "sequence '" + location + "'", diagnostics);
296        flagIfDuplicate(objectBucket, name, ESQLDataObjectType.dotUnknown, CatalogObjectKind.SEQUENCE,
297            "sequence '" + location + "'", diagnostics);
298    }
299
300    private void validateColumn(ColumnModel c,
301                                EDbVendor vendor,
302                                IdentifierService service,
303                                boolean normalizeOnLoad,
304                                String parentLocation,
305                                IdentifierBucket columnBucket,
306                                List<CatalogDiagnostic> diagnostics) {
307        String name = c.name();
308        checkBypass(vendor, service, normalizeOnLoad, name, ESQLDataObjectType.dotColumn,
309            "column '" + parentLocation + "." + name + "'", diagnostics);
310        flagIfDuplicate(columnBucket, name, ESQLDataObjectType.dotColumn, CatalogObjectKind.COLUMN,
311            "column '" + parentLocation + "." + name + "'", diagnostics);
312    }
313
314    // ---------- helpers ----------
315
316    private void checkRequired(String label, String value, List<CatalogDiagnostic> diagnostics) {
317        if (value == null || value.isEmpty()) {
318            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_MISSING_DEFAULT,
319                label + ".name is required"));
320        }
321    }
322
323    /**
324     * Identifier-bypass detection. When {@code normalizeOnLoad} is true, the raw name
325     * must already equal {@code IdentifierService.normalize(name)}; otherwise an adapter
326     * either pre-normalized through a non-{@code IdentifierService} path or shipped a
327     * raw form that the resolver will fail to match. We emit a WARN, not an ERROR, so
328     * downstream code remains tolerant — strict-mode callers escalate at the load
329     * entry point.
330     *
331     * <p>For COLLATION_BASED dialects (MSSQL/Azure SQL) where {@code normalize} preserves
332     * the input spelling, this check is a no-op: the resolver will route compares
333     * through {@code IdentifierService.areEqual} regardless of stored case.</p>
334     */
335    private void checkBypass(EDbVendor vendor,
336                             IdentifierService service,
337                             boolean normalizeOnLoad,
338                             String name,
339                             ESQLDataObjectType type,
340                             String location,
341                             List<CatalogDiagnostic> diagnostics) {
342        if (!normalizeOnLoad || name == null || name.isEmpty()) {
343            return;
344        }
345        String normalized = service.normalize(name, type);
346        if (normalized == null || normalized.equals(name)) {
347            return;
348        }
349        // For COLLATION_BASED dialects (MSSQL/Azure SQL with case-insensitive collation,
350        // MySQL lower_case_table_names=2) IdentifierService.normalize preserves the input
351        // spelling, so we never reach this point for them — no false positives. The
352        // condition above (normalized != name) is enough to identify a bypass for the
353        // case-folding dialects (Oracle/DB2 → UPPER, Postgres/Greenplum/Redshift → lower)
354        // we care about.
355        diagnostics.add(CatalogDiagnostic.builder()
356            .severity(CatalogDiagnosticSeverity.WARN)
357            .code(CatalogDiagnosticCode.CATALOG_VALIDATION_IDENTIFIER_BYPASS)
358            .message(location + ": raw name '" + name + "' is not in canonical form for "
359                + vendor + " (expected '" + normalized + "'). The reader bypassed "
360                + "IdentifierService normalization or normalizeOnLoad must be disabled.")
361            .repairHint("Run input through CatalogIdentifierPolicy.normalize / "
362                + "IdentifierService.normalize before constructing the model, or set "
363                + "CatalogLoadOptions.normalizeOnLoad(false) when the source intentionally "
364                + "preserves a non-canonical form.")
365            .build());
366    }
367
368    private void flagIfDuplicate(IdentifierBucket bucket,
369                                 String name,
370                                 ESQLDataObjectType type,
371                                 CatalogObjectKind kind,
372                                 String location,
373                                 List<CatalogDiagnostic> diagnostics) {
374        if (name == null) return;
375        if (!bucket.add(name, type)) {
376            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_DUPLICATE_NAME,
377                "duplicate " + labelFor(kind) + ": " + location));
378        }
379    }
380
381    private static CatalogDiagnostic error(CatalogDiagnosticCode code, String message) {
382        return CatalogDiagnostic.builder()
383            .severity(CatalogDiagnosticSeverity.ERROR)
384            .code(code)
385            .message(message)
386            .build();
387    }
388
389    private static String qualified(String catalog, String schema, String name) {
390        StringBuilder sb = new StringBuilder();
391        if (catalog != null && !catalog.isEmpty()) sb.append(catalog).append('.');
392        if (schema != null && !schema.isEmpty()) sb.append(schema).append('.');
393        sb.append(name);
394        return sb.toString();
395    }
396
397    /**
398     * Choose the {@link IdentifierConfig} the validator should compare against.
399     *
400     * <p>Plan §9.2: the model carries its own config (frequently the vendor default,
401     * sometimes adapter-supplied to model MySQL {@code lower_case_table_names} or
402     * MSSQL collation). The caller may pass a {@link CatalogLoadOptions} with an
403     * explicit override — in that case the override wins, after a vendor-consistency
404     * check. When the caller did not pass an explicit override, the model's config is
405     * authoritative.</p>
406     */
407    private static IdentifierConfig effectiveConfig(UnifiedCatalogModel model,
408                                                    CatalogLoadOptions options,
409                                                    List<CatalogDiagnostic> diagnostics) {
410        IdentifierConfig modelCfg = model.identifierConfig();
411        // Always check the model's own identifierConfig vendor matches the model vendor —
412        // this is independent of the options path and catches a manifest that declared a
413        // mismatched IdentifierConfig at the top level (rare but possible when readers
414        // forward a config supplied by a different layer of code).
415        if (modelCfg != null && modelCfg.vendor() != model.vendor()) {
416            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_IDENTIFIER_BYPASS,
417                "UnifiedCatalogModel.identifierConfig.vendor=" + modelCfg.vendor()
418                    + " does not match model.vendor=" + model.vendor()));
419        }
420        if (options == null) {
421            return modelCfg;
422        }
423        // Always check options.vendor against model.vendor — a downstream loader / runtime
424        // is going to be driven by options.vendor (CatalogQuery, CatalogLoaders), so a
425        // mismatch is a real configuration bug regardless of whether identifierConfig was
426        // explicit.
427        if (options.vendor() != model.vendor()) {
428            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_IDENTIFIER_BYPASS,
429                "CatalogLoadOptions.vendor=" + options.vendor()
430                    + " does not match model.vendor=" + model.vendor()));
431        }
432        if (!options.hasExplicitIdentifierConfig()) {
433            return modelCfg;
434        }
435        IdentifierConfig optsCfg = options.identifierConfig();
436        if (optsCfg.vendor() != model.vendor()) {
437            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_IDENTIFIER_BYPASS,
438                "CatalogLoadOptions.identifierConfig.vendor=" + optsCfg.vendor()
439                    + " does not match model.vendor=" + model.vendor()));
440            return modelCfg;
441        }
442        return optsCfg;
443    }
444
445    private static ESQLDataObjectType leafTypeFor(CatalogObjectKind kind) {
446        switch (kind) {
447            case FUNCTION:  return ESQLDataObjectType.dotFunction;
448            case PROCEDURE: return ESQLDataObjectType.dotProcedure;
449            case PACKAGE:   return ESQLDataObjectType.dotOraclePackage;
450            case ROUTINE:   return ESQLDataObjectType.dotRoutine;
451            default:        return ESQLDataObjectType.dotUnknown;
452        }
453    }
454
455    /**
456     * Diagnostic-friendly lower-case label for a kind. Hand-coded to avoid
457     * {@link String#toLowerCase()}, which the forbidden-apis plugin bans inside
458     * {@code catalog/**} (plan §9.5).
459     */
460    private static String labelFor(CatalogObjectKind kind) {
461        switch (kind) {
462            case CATALOG:           return "catalog";
463            case SCHEMA:            return "schema";
464            case TABLE:             return "table";
465            case VIEW:              return "view";
466            case MATERIALIZED_VIEW: return "materialized view";
467            case COLUMN:            return "column";
468            case ROUTINE:           return "routine";
469            case FUNCTION:          return "function";
470            case PROCEDURE:         return "procedure";
471            case PACKAGE:           return "package";
472            case SYNONYM:           return "synonym";
473            case SEQUENCE:          return "sequence";
474            case TYPE:              return "type";
475            case TRIGGER:           return "trigger";
476            case INDEX:             return "index";
477            case CONSTRAINT:        return "constraint";
478            default:                return "object";
479        }
480    }
481
482    /**
483     * Per-scope identifier deduplication bucket. Calls {@link IdentifierService#areEqual}
484     * for every membership test so dialects whose compare rule is case-insensitive but
485     * whose {@code normalize} preserves spelling (BigQuery columns, MSSQL with case-
486     * insensitive collation, MySQL {@code lower_case_table_names=2}) still catch
487     * collisions that a {@code Set<String>} of normalize keys would miss.
488     *
489     * <p>The {@link ESQLDataObjectType} used for comparison is supplied per-{@link #add(String, ESQLDataObjectType)}
490     * call rather than fixed at construction. This matters when one logical namespace
491     * mixes kinds that have different per-vendor case-sensitivity rules — e.g., MySQL's
492     * default schema namespace where tables compare case-sensitively
493     * ({@link ESQLDataObjectType#dotTable}) but routines compare case-insensitively
494     * ({@link ESQLDataObjectType#dotRoutine}/{@code dotFunction}/{@code dotProcedure}).
495     * Each entry is checked against existing entries using the type registered on
496     * insert; duplicate detection is symmetric (existing → new and new → existing).</p>
497     */
498    static final class IdentifierBucket {
499        private final IdentifierService service;
500        private final List<String> seenNames = new ArrayList<String>();
501        private final List<ESQLDataObjectType> seenTypes = new ArrayList<ESQLDataObjectType>();
502
503        IdentifierBucket(IdentifierService service) {
504            this.service = service;
505        }
506
507        /** Add a name with its kind-specific compare type. */
508        boolean add(String raw, ESQLDataObjectType type) {
509            if (raw == null) return true;
510            for (int i = 0; i < seenNames.size(); i++) {
511                String existing = seenNames.get(i);
512                ESQLDataObjectType existingType = seenTypes.get(i);
513                // Cross-kind compare uses the more permissive (incoming) type so the bucket
514                // catches "table Fn vs function fn" collisions even when only one of the
515                // two compare rules is case-insensitive.
516                if (service.areEqual(existing, raw, type)
517                    || service.areEqual(existing, raw, existingType)) {
518                    return false;
519                }
520            }
521            seenNames.add(raw);
522            seenTypes.add(type);
523            return true;
524        }
525
526        /**
527         * Membership test against entries already added with their per-call type.
528         * Used by constraint/index references against column names — the caller passes
529         * {@link ESQLDataObjectType#dotColumn} (which is what those entries were added
530         * with anyway, but the API stays explicit).
531         */
532        boolean contains(String raw, ESQLDataObjectType type) {
533            if (raw == null) return false;
534            for (int i = 0; i < seenNames.size(); i++) {
535                if (service.areEqual(seenNames.get(i), raw, type)
536                    || service.areEqual(seenNames.get(i), raw, seenTypes.get(i))) {
537                    return true;
538                }
539            }
540            return false;
541        }
542    }
543
544    // ---------- FK validation ----------
545
546    /**
547     * Lenient FK type recognition. Matches {@code FK}, {@code FOREIGN KEY},
548     * {@code FOREIGN_KEY} case-insensitively against the constraint's
549     * {@link ConstraintModel#type()} string. Hand-coded to avoid
550     * {@link String#equalsIgnoreCase} which the forbidden-apis plugin bans
551     * inside {@code catalog/**}.
552     */
553    private static boolean isForeignKeyType(String type) {
554        if (type == null) return false;
555        return asciiEqualsIgnoreCase(type, "FK")
556            || asciiEqualsIgnoreCase(type, "FOREIGN KEY")
557            || asciiEqualsIgnoreCase(type, "FOREIGN_KEY");
558    }
559
560    private static boolean asciiEqualsIgnoreCase(String a, String b) {
561        if (a == null || b == null) return a == b;
562        int len = a.length();
563        if (b.length() != len) return false;
564        for (int i = 0; i < len; i++) {
565            char ca = a.charAt(i);
566            char cb = b.charAt(i);
567            if (ca == cb) continue;
568            if (ca >= 'A' && ca <= 'Z') ca = (char) (ca + 32);
569            if (cb >= 'A' && cb <= 'Z') cb = (char) (cb + 32);
570            if (ca != cb) return false;
571        }
572        return true;
573    }
574
575    private void validateForeignKey(ConstraintModel cs,
576                                    String fkLocation,
577                                    String currentSchema,
578                                    EDbVendor vendor,
579                                    IdentifierService service,
580                                    CatalogTableIndex tableIndex,
581                                    List<CatalogDiagnostic> diagnostics) {
582        String csLabel = "constraint '" + (cs.name() == null ? "<unnamed>" : cs.name())
583            + "' on table '" + fkLocation + "'";
584        String referencedTable = cs.referencedTable();
585        if (referencedTable == null || referencedTable.isEmpty()) {
586            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_INVALID_REFERENCE,
587                csLabel + " is a FOREIGN KEY but has no referencedTable"));
588            return;
589        }
590        List<String> refCols = cs.referencedColumns();
591        if (refCols.size() != cs.columns().size()) {
592            diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_INVALID_REFERENCE,
593                csLabel + " has " + cs.columns().size() + " column(s) but "
594                    + refCols.size() + " referencedColumn(s); FK column counts must match"));
595            // Continue to surface other issues (target table resolution) — don't bail.
596        }
597
598        CatalogTableIndex.Lookup resolved = tableIndex.resolve(referencedTable, currentSchema);
599        switch (resolved.kind) {
600            case NOT_FOUND:
601                diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_INVALID_REFERENCE,
602                    csLabel + " references unknown table '" + referencedTable + "'"));
603                return;
604            case AMBIGUOUS:
605                diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_INVALID_REFERENCE,
606                    csLabel + " references ambiguous bare table name '" + referencedTable
607                        + "'; qualify with schema"));
608                return;
609            case FOUND:
610            default:
611                break;
612        }
613
614        // Verify each referencedColumn exists on the target table.
615        TableModel target = resolved.table;
616        for (String col : refCols) {
617            if (!columnExists(target, col, service)) {
618                diagnostics.add(error(CatalogDiagnosticCode.CATALOG_VALIDATION_INVALID_REFERENCE,
619                    csLabel + " references unknown column '" + col
620                        + "' on referenced table '" + referencedTable + "'"));
621            }
622        }
623    }
624
625    private static boolean columnExists(TableModel t, String columnName, IdentifierService service) {
626        if (columnName == null) return false;
627        for (ColumnModel c : t.columns()) {
628            if (service.areEqual(c.name(), columnName, ESQLDataObjectType.dotColumn)) {
629                return true;
630            }
631        }
632        return false;
633    }
634
635    /**
636     * Per-catalog table lookup index, built once at {@link #validateCatalog} entry and
637     * threaded down so each FK constraint can resolve its {@code referencedTable}
638     * against the whole catalog (not just the current schema).
639     *
640     * <p>Keys are normalized via {@link IdentifierService#normalize} so the lookup
641     * honors the vendor's identifier-folding rule (Oracle uppercase, PG lowercase,
642     * etc.). Three key shapes are stored:</p>
643     * <ul>
644     *   <li><b>Catalog-qualified</b> ({@code "catalog.schema.table"}) — populated
645     *       for every table. Lets an FK target an explicit
646     *       {@code ConstraintModel#referencedTable} of the form
647     *       {@code "ORCL.HR.DEPT"} (the form the Javadoc documents).</li>
648     *   <li><b>Schema-qualified</b> ({@code "schema.table"}) — populated for every
649     *       table. The primary lookup for FK references of the form
650     *       {@code "schema.table"}.</li>
651     *   <li><b>Bare</b> ({@code "table"}) — fallback when the FK's
652     *       {@code referencedTable} has no dot. If a bare name appears in 2+
653     *       schemas, it is marked ambiguous and resolution then requires the
654     *       caller to qualify.</li>
655     * </ul>
656     *
657     * <p><b>Resolution order for bare {@code referencedTable}:</b></p>
658     * <ol>
659     *   <li>First try {@code currentSchema + "." + ref} — current-schema-first
660     *       precedence, matching CREATE TABLE / FK semantics in real databases.
661     *       A local same-named table wins over a same-named table in another
662     *       schema even when the bare name is ambiguous catalog-wide.</li>
663     *   <li>Otherwise fall back to a catalog-wide bare lookup; ambiguity is only
664     *       reported when the bare name doesn't resolve in the current schema.</li>
665     * </ol>
666     */
667    private static final class CatalogTableIndex {
668
669        enum Kind { FOUND, NOT_FOUND, AMBIGUOUS }
670
671        static final class Lookup {
672            final Kind kind;
673            final TableModel table;
674            Lookup(Kind k, TableModel t) { this.kind = k; this.table = t; }
675        }
676
677        private final IdentifierService service;
678        // Catalog-qualified ("catalog.schema.table") and schema-qualified ("schema.table") keys
679        // share this map. Both forms are pre-normalized once at build time.
680        private final Map<String, TableModel> byQualifiedKey;
681        private final Map<String, TableModel> byBareKey;
682        private final Map<String, Boolean> bareNameAmbiguous;
683
684        private CatalogTableIndex(IdentifierService service) {
685            this.service = service;
686            this.byQualifiedKey = new LinkedHashMap<String, TableModel>();
687            this.byBareKey = new HashMap<String, TableModel>();
688            this.bareNameAmbiguous = new HashMap<String, Boolean>();
689        }
690
691        static CatalogTableIndex build(CatalogModel c, IdentifierService service) {
692            CatalogTableIndex idx = new CatalogTableIndex(service);
693            String catalogName = c.name();
694            for (SchemaModel s : c.schemas()) {
695                String schemaName = s.name();
696                for (TableModel t : s.tables()) {
697                    String bare = t.name();
698                    if (bare == null || bare.isEmpty()) continue;
699                    if (schemaName != null && !schemaName.isEmpty()) {
700                        String schemaQual = service.normalize(
701                            schemaName + "." + bare, ESQLDataObjectType.dotTable);
702                        idx.byQualifiedKey.put(schemaQual, t);
703                        if (catalogName != null && !catalogName.isEmpty()) {
704                            String fullyQual = service.normalize(
705                                catalogName + "." + schemaName + "." + bare,
706                                ESQLDataObjectType.dotTable);
707                            idx.byQualifiedKey.put(fullyQual, t);
708                        }
709                    }
710                    String bareKey = service.normalize(bare, ESQLDataObjectType.dotTable);
711                    if (idx.byBareKey.containsKey(bareKey)) {
712                        idx.bareNameAmbiguous.put(bareKey, Boolean.TRUE);
713                    } else {
714                        idx.byBareKey.put(bareKey, t);
715                    }
716                }
717            }
718            return idx;
719        }
720
721        Lookup resolve(String referencedTable, String currentSchema) {
722            if (referencedTable == null || referencedTable.isEmpty()) {
723                return new Lookup(Kind.NOT_FOUND, null);
724            }
725            // Case 1: ref has a dot — treat as qualified. Tries both shapes the index
726            // stores (catalog.schema.table and schema.table) without further parsing.
727            if (referencedTable.indexOf('.') >= 0) {
728                String qual = service.normalize(referencedTable, ESQLDataObjectType.dotTable);
729                TableModel hit = byQualifiedKey.get(qual);
730                if (hit != null) return new Lookup(Kind.FOUND, hit);
731                return new Lookup(Kind.NOT_FOUND, null);
732            }
733            // Case 2: ref is bare. Try current schema first (current-schema-first
734            // precedence — matches FK / CREATE TABLE semantics in real databases).
735            if (currentSchema != null && !currentSchema.isEmpty()) {
736                String qual = service.normalize(
737                    currentSchema + "." + referencedTable, ESQLDataObjectType.dotTable);
738                TableModel hit = byQualifiedKey.get(qual);
739                if (hit != null) return new Lookup(Kind.FOUND, hit);
740            }
741            // Case 3: ref is bare and not in current schema. Fall back to catalog-wide
742            // bare lookup; ambiguity gate fires only when the local-schema shortcut missed.
743            String bareKey = service.normalize(referencedTable, ESQLDataObjectType.dotTable);
744            if (Boolean.TRUE.equals(bareNameAmbiguous.get(bareKey))) {
745                return new Lookup(Kind.AMBIGUOUS, null);
746            }
747            TableModel hit = byBareKey.get(bareKey);
748            if (hit != null) return new Lookup(Kind.FOUND, hit);
749            return new Lookup(Kind.NOT_FOUND, null);
750        }
751    }
752}