001package gudusoft.gsqlparser.catalog.input.readers;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.catalog.input.CatalogInputException;
005import gudusoft.gsqlparser.catalog.input.CatalogInputKind;
006import gudusoft.gsqlparser.catalog.input.CatalogInputReader;
007import gudusoft.gsqlparser.catalog.input.CatalogInputReaderFactory;
008import gudusoft.gsqlparser.catalog.input.CatalogInputSource;
009import gudusoft.gsqlparser.catalog.input.CatalogLoadOptions;
010import gudusoft.gsqlparser.catalog.input.model.CatalogModel;
011import gudusoft.gsqlparser.catalog.input.model.CatalogSourceInfo;
012import gudusoft.gsqlparser.catalog.input.model.ColumnModel;
013import gudusoft.gsqlparser.catalog.input.model.DefaultsConfig;
014import gudusoft.gsqlparser.catalog.input.model.SchemaModel;
015import gudusoft.gsqlparser.catalog.input.model.TableModel;
016import gudusoft.gsqlparser.catalog.input.model.UnifiedCatalogModel;
017import gudusoft.gsqlparser.catalog.input.model.ViewModel;
018import gudusoft.gsqlparser.util.json.JSON;
019
020import java.io.BufferedReader;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.InputStreamReader;
024import java.io.Reader;
025import java.nio.charset.StandardCharsets;
026import java.nio.file.Files;
027import java.util.LinkedHashMap;
028import java.util.List;
029import java.util.Map;
030
031/**
032 * Reader for {@link CatalogInputKind#SQLDEP_JSON} sources — the JSON shape the legacy
033 * {@code SQLDepSQLEnv} consumes (plan §11 T2.A.4 / first Phase 2.A static-file adapter).
034 *
035 * <p>Per plan §6 / §11.2 this reader funnels the SQLDep export shape into the same
036 * {@link UnifiedCatalogModel} the JSON-manifest reader uses; the eager bridge then
037 * materializes a {@code TSQLEnv} byte-for-byte equivalent to what the legacy
038 * {@code SQLDepSQLEnv} would build (parity test in {@code SqldepLegacyParityTest}).
039 * The legacy path remains untouched per plan §8.5.</p>
040 *
041 * <p>SQLDep export schema (key fields):</p>
042 * <pre>{@code
043 * {
044 *   "createdBy": "sqldep ...",            // optional but conventional; not required
045 *   "databaseModel": { ... },             // optional outer wrapper
046 *   "databases": [
047 *     {
048 *       "name": "ORCL",
049 *       "tables": [
050 *         {
051 *           "schema": "HR",                  // per-table schema label
052 *           "name": "EMPLOYEES",
053 *           "isView": "false",               // optional, string boolean
054 *           "columns": [{"name": "..."}, ...]
055 *         }
056 *       ]
057 *     }
058 *   ]
059 * }
060 * }</pre>
061 *
062 * <p>The reader is dispatch-by-{@link CatalogInputKind#SQLDEP_JSON} (callers tag the
063 * source explicitly). Auto-claim by {@code createdBy} content marker is intentionally
064 * not implemented here — that dispatch already lives in the legacy
065 * {@code TJSONSQLEnvParser} and reproducing it inside a {@code supports()} pre-scan
066 * would force every SQLDep candidate source to be drained twice.</p>
067 *
068 * <p>Identifier names with embedded dots are wrapped with the vendor's quote
069 * character before storage. Without the wrap, {@code ModelBackedCatalogProvider}
070 * later concatenates segments with {@code .} into qualified strings (e.g.
071 * {@code catalog.schema.A.B}) that {@code CatalogIdentifierPolicy.parse} would
072 * split into four segments instead of three, and the runtime would never resolve
073 * the SQL reference {@code "A.B"}. The legacy {@code SQLDepSQLEnv} carries the
074 * same defense — the delimiter chars come from {@code TSQLEnv.delimitedChar} —
075 * and we duplicate that table here so {@code catalog/**} stays free of a
076 * compile-time dependency on the {@code sqlenv} runtime layer.
077 * {@code CatalogModelValidator} still surfaces an identifier-bypass WARN when
078 * the produced names disagree with {@code IdentifierService.normalize}, exactly
079 * as for the JSON-manifest reader.</p>
080 */
081public final class SqldepCatalogInputReader implements CatalogInputReader {
082
083    public SqldepCatalogInputReader() {
084    }
085
086    @Override
087    public CatalogInputKind kind() {
088        return CatalogInputKind.SQLDEP_JSON;
089    }
090
091    @Override
092    public boolean supports(CatalogInputSource source, CatalogLoadOptions options) {
093        if (source == null || source.inMemoryModel() != null) {
094            return false;
095        }
096        return source.declaredKind() == CatalogInputKind.SQLDEP_JSON;
097    }
098
099    @Override
100    public UnifiedCatalogModel read(CatalogInputSource source, CatalogLoadOptions options)
101            throws CatalogInputException {
102        if (source == null) {
103            throw new CatalogInputException("SqldepCatalogInputReader: source is required");
104        }
105        final EDbVendor vendor = (options != null && options.vendor() != null)
106            ? options.vendor()
107            : EDbVendor.dbvoracle;  // SQLDep exports historically default to Oracle
108
109        long start = System.currentTimeMillis();
110        String text = readAll(source);
111        Object parsed;
112        try {
113            parsed = JSON.parseObject(text);
114        } catch (RuntimeException ex) {
115            throw new CatalogInputException(
116                "Failed to parse SQLDep JSON from " + source.name() + ": " + ex.getMessage(),
117                ex);
118        }
119        if (!(parsed instanceof Map)) {
120            throw new CatalogInputException(
121                "SQLDep root must be an object (got " + typeOf(parsed) + ")");
122        }
123        @SuppressWarnings("unchecked")
124        Map<Object, Object> root = (Map<Object, Object>) parsed;
125
126        // Optional outer wrapper — mirror SQLDepSQLEnv.initSQLEnv().
127        Map<Object, Object> body = root;
128        Object dm = root.get("databaseModel");
129        if (dm instanceof Map) {
130            @SuppressWarnings("unchecked")
131            Map<Object, Object> dmMap = (Map<Object, Object>) dm;
132            body = dmMap;
133        } else if (dm != null) {
134            throw new CatalogInputException(
135                "SQLDep field 'databaseModel' must be an object (got " + typeOf(dm) + ")");
136        }
137
138        try {
139            UnifiedCatalogModel.Builder mb = UnifiedCatalogModel.builder().vendor(vendor);
140            applyOptionDefaults(mb, options);
141            mb.sourceInfo(buildSourceInfo(source, start));
142
143            Object databases = body.get("databases");
144            if (databases == null) {
145                // Empty SQLDep export — return a vendor-only model. Validator gives a
146                // WARN downstream if defaults are missing; structural emptiness here is
147                // legal (and matches SQLDepSQLEnv's silent no-op when "databases" is null).
148                return mb.build();
149            }
150            if (!(databases instanceof List)) {
151                throw new CatalogInputException(
152                    "SQLDep field 'databases' must be an array (got " + typeOf(databases) + ")");
153            }
154            for (Object dobj : (List<?>) databases) {
155                if (!(dobj instanceof Map)) {
156                    throw new CatalogInputException(
157                        "SQLDep databases[i] must be an object (got " + typeOf(dobj) + ")");
158                }
159                @SuppressWarnings("unchecked")
160                Map<Object, Object> dmap = (Map<Object, Object>) dobj;
161                CatalogModel catalog = parseDatabase(dmap, vendor);
162                if (catalog != null) {
163                    mb.addCatalog(catalog);
164                }
165            }
166            return mb.build();
167        } catch (IllegalArgumentException ex) {
168            // Model builders enforce structural invariants (non-empty names, etc.) by
169            // throwing IllegalArgumentException. Surface those through the reader's
170            // checked-exception channel so callers using try/catch on
171            // CatalogInputException don't get caught off guard.
172            throw new CatalogInputException(
173                "Malformed SQLDep export from " + source.name() + ": " + ex.getMessage(), ex);
174        }
175    }
176
177    // ---------- catalog / schema / table parsing ----------
178
179    /**
180     * Walk one {@code databases[i]} entry into a {@link CatalogModel}. SQLDep stores the
181     * schema name on each table, so we group tables by their {@code schema} field before
182     * building {@link SchemaModel}s — preserves insertion order so the round-trip with
183     * {@code SQLEnvCatalogLoader} carries the same ordering as the legacy loader.
184     *
185     * <p>Catalog / schema / table names are wrapped with the vendor's quote char when
186     * they contain a dot — see {@link #wrapDottedName} for the rationale.</p>
187     */
188    private CatalogModel parseDatabase(Map<Object, Object> dmap, EDbVendor vendor)
189            throws CatalogInputException {
190        String dbName = wrapDottedName(asString(dmap, "name", true), vendor);
191        Object tables = dmap.get("tables");
192        Map<String, SchemaGroup> groups = new LinkedHashMap<String, SchemaGroup>();
193        if (tables != null) {
194            if (!(tables instanceof List)) {
195                throw new CatalogInputException(
196                    "SQLDep database '" + dbName + "' field 'tables' must be an array (got "
197                        + typeOf(tables) + ")");
198            }
199            for (Object tobj : (List<?>) tables) {
200                if (!(tobj instanceof Map)) {
201                    throw new CatalogInputException(
202                        "SQLDep database '" + dbName + "' tables[i] must be an object (got "
203                            + typeOf(tobj) + ")");
204                }
205                @SuppressWarnings("unchecked")
206                Map<Object, Object> tmap = (Map<Object, Object>) tobj;
207                String rawSchema = asString(tmap, "schema", false);
208                String schemaName = (rawSchema == null) ? "" : wrapDottedName(rawSchema, vendor);
209                SchemaGroup g = groups.get(schemaName);
210                if (g == null) {
211                    g = new SchemaGroup(schemaName);
212                    groups.put(schemaName, g);
213                }
214                g.addTable(tmap, vendor);
215            }
216        }
217        CatalogModel.Builder cb = CatalogModel.builder().name(dbName);
218        for (SchemaGroup g : groups.values()) {
219            cb.addSchema(g.build());
220        }
221        return cb.build();
222    }
223
224    private static final class SchemaGroup {
225        private final SchemaModel.Builder b;
226
227        SchemaGroup(String name) {
228            this.b = SchemaModel.builder().name(name);
229        }
230
231        void addTable(Map<Object, Object> tmap, EDbVendor vendor)
232                throws CatalogInputException {
233            String tableName = wrapDottedName(asString(tmap, "name", true), vendor);
234            boolean isView = parseStringBoolean(tmap.get("isView"));
235            // Column names are NOT wrapped: ModelBackedCatalogProvider builds the
236            // qualified column form as `<tableQ>.<column>` where <tableQ> already has
237            // any required quoting. A column name with an embedded dot would still be
238            // ambiguous downstream — but the legacy loader has the same gap, so parity
239            // is preserved without the extra wrapping here.
240            List<ColumnModel> cols = parseColumns(tmap, tableName);
241            if (isView) {
242                ViewModel.Builder vb = ViewModel.builder().name(tableName);
243                for (ColumnModel c : cols) {
244                    vb.addColumn(c);
245                }
246                b.addView(vb.build());
247            } else {
248                TableModel.Builder tb = TableModel.builder().name(tableName);
249                for (ColumnModel c : cols) {
250                    tb.addColumn(c);
251                }
252                b.addTable(tb.build());
253            }
254        }
255
256        SchemaModel build() {
257            return b.build();
258        }
259    }
260
261    /**
262     * Wrap a raw identifier with the vendor's open/close quote chars when it contains
263     * a dot. Mirrors the legacy {@code SQLDepSQLEnv} defense: without the wrap, the
264     * downstream string concatenation in {@code ModelBackedCatalogProvider.qualified}
265     * would emit {@code catalog.schema.A.B} for a name {@code "A.B"}, and
266     * {@code CatalogIdentifierPolicy.parse} would split that into four segments —
267     * the table would never resolve. Already-wrapped names (delimiter chars on both
268     * ends) are returned unchanged so a manifest that already quoted its dotted
269     * names round-trips intact.
270     *
271     * <p>The vendor → delimiter-pair mapping is duplicated from
272     * {@code TSQLEnv.delimitedChar}; we keep {@code catalog/**} free of a
273     * compile-time dependency on {@code sqlenv} for layering reasons. If a new
274     * dialect adds a non-{@code "..."} quote rule there, it must also be added here.</p>
275     */
276    private static String wrapDottedName(String raw, EDbVendor vendor) {
277        if (raw == null || raw.isEmpty() || raw.indexOf('.') == -1) {
278            return raw;
279        }
280        char open = vendorOpenDelimiter(vendor);
281        char close = vendorCloseDelimiter(vendor);
282        // If the raw name is already wrapped on both ends with the vendor's pair,
283        // do nothing — the caller already handed us a delimited form and round-trip
284        // through the parser would just nest the delimiters.
285        if (raw.length() >= 2
286            && raw.charAt(0) == open
287            && raw.charAt(raw.length() - 1) == close) {
288            return raw;
289        }
290        return open + raw + close;
291    }
292
293    private static char vendorOpenDelimiter(EDbVendor v) {
294        if (v == null) return '"';
295        switch (v) {
296            case dbvmssql:
297            case dbvazuresql:
298                return '[';
299            case dbvathena:
300            case dbvmysql:
301            case dbvbigquery:
302            case dbvcouchbase:
303            case dbvhive:
304            case dbvimpala:
305            case dbvdatabricks:
306                return '`';
307            case dbvdax:
308                return '\'';
309            default:
310                return '"';
311        }
312    }
313
314    /**
315     * MSSQL/Azure use {@code [name]} (asymmetric); every other vendor uses the same
316     * quote char on both ends. The legacy {@code TSQLEnv.delimitedChar} returns just
317     * the open char and the legacy loader uses it on both sides — that produces an
318     * invalid SQL form ({@code [foo[}) but happens to work for the legacy lookup
319     * because the env stores the literal string as a single key. We use a proper
320     * close-bracket here so the wrapped name is also valid SQL when echoed in
321     * diagnostics; lookups still succeed because {@code IdentifierConfig.stripQuotedDelimiters}
322     * canonicalizes both forms before comparison.
323     */
324    private static char vendorCloseDelimiter(EDbVendor v) {
325        if (v == null) return '"';
326        switch (v) {
327            case dbvmssql:
328            case dbvazuresql:
329                return ']';
330            default:
331                return vendorOpenDelimiter(v);
332        }
333    }
334
335    /**
336     * SQLDep encodes booleans as quoted strings ({@code "true"} / {@code "false"}) — see
337     * {@code SQLDepSQLEnv.initSQLEnv} which calls {@code Boolean.parseBoolean(...)}. We
338     * mirror that: any non-string value tolerates a real Boolean too, and missing ⇒
339     * false (a regular table).
340     */
341    private static boolean parseStringBoolean(Object v) {
342        if (v == null) return false;
343        if (v instanceof Boolean) return (Boolean) v;
344        return Boolean.parseBoolean(v.toString());
345    }
346
347    private static List<ColumnModel> parseColumns(Map<Object, Object> tmap, String tableName)
348            throws CatalogInputException {
349        Object columns = tmap.get("columns");
350        java.util.ArrayList<ColumnModel> out = new java.util.ArrayList<ColumnModel>();
351        if (columns == null) {
352            return out;
353        }
354        if (!(columns instanceof List)) {
355            throw new CatalogInputException(
356                "SQLDep table '" + tableName + "' field 'columns' must be an array (got "
357                    + typeOf(columns) + ")");
358        }
359        for (Object cobj : (List<?>) columns) {
360            if (!(cobj instanceof Map)) {
361                throw new CatalogInputException(
362                    "SQLDep table '" + tableName + "' columns[i] must be an object (got "
363                        + typeOf(cobj) + ")");
364            }
365            @SuppressWarnings("unchecked")
366            Map<Object, Object> cmap = (Map<Object, Object>) cobj;
367            String colName = asString(cmap, "name", true);
368            ColumnModel.Builder cb = ColumnModel.builder().name(colName);
369            String dt = asString(cmap, "dataType", false);
370            if (dt == null) dt = asString(cmap, "type", false);
371            if (dt != null) cb.dataType(dt);
372            out.add(cb.build());
373        }
374        return out;
375    }
376
377    // ---------- defaults / source info ----------
378
379    private static void applyOptionDefaults(UnifiedCatalogModel.Builder mb,
380                                            CatalogLoadOptions options) {
381        if (options == null) return;
382        DefaultsConfig.Builder db = DefaultsConfig.builder();
383        boolean any = false;
384        if (options.defaultCatalog() != null && !options.defaultCatalog().isEmpty()) {
385            db.defaultCatalog(options.defaultCatalog());
386            any = true;
387        }
388        if (options.defaultSchema() != null && !options.defaultSchema().isEmpty()) {
389            db.defaultSchema(options.defaultSchema());
390            any = true;
391        }
392        if (options.defaultServer() != null && !options.defaultServer().isEmpty()) {
393            db.defaultServer(options.defaultServer());
394            any = true;
395        }
396        if (any) {
397            mb.defaults(db.build());
398        }
399    }
400
401    private static CatalogSourceInfo buildSourceInfo(CatalogInputSource source, long startMillis) {
402        return CatalogSourceInfo.builder()
403            .kind(CatalogInputKind.SQLDEP_JSON)
404            .name(source.name() != null ? source.name() : "<sqldep>")
405            .readMillis(System.currentTimeMillis() - startMillis)
406            .build();
407    }
408
409    // ---------- input → string ----------
410
411    private static String readAll(CatalogInputSource source) throws CatalogInputException {
412        try {
413            if (source.inMemoryModel() != null) {
414                throw new CatalogInputException(
415                    "SqldepCatalogInputReader cannot read in-memory model sources");
416            }
417            if (source.path() != null) {
418                byte[] b = Files.readAllBytes(source.path());
419                return new String(b, StandardCharsets.UTF_8);
420            }
421            byte[] sourceBytes = source.bytes();
422            if (sourceBytes != null) {
423                return new String(sourceBytes, StandardCharsets.UTF_8);
424            }
425            if (source.url() != null) {
426                try (InputStream in = source.url().openStream();
427                     Reader r = new InputStreamReader(in, StandardCharsets.UTF_8)) {
428                    return drain(r);
429                }
430            }
431            if (source.reader() != null) {
432                return drain(source.reader());
433            }
434            throw new CatalogInputException(
435                "SqldepCatalogInputReader: source has no readable backing");
436        } catch (IOException io) {
437            throw new CatalogInputException(
438                "Failed to read SQLDep export from " + source.name() + ": " + io.getMessage(),
439                io);
440        }
441    }
442
443    private static String drain(Reader r) throws IOException {
444        BufferedReader br = (r instanceof BufferedReader) ? (BufferedReader) r : new BufferedReader(r);
445        StringBuilder sb = new StringBuilder();
446        char[] buf = new char[4096];
447        int n;
448        while ((n = br.read(buf)) > 0) {
449            sb.append(buf, 0, n);
450        }
451        return sb.toString();
452    }
453
454    // ---------- shared field accessors ----------
455
456    private static String asString(Map<Object, Object> obj, String key, boolean required)
457            throws CatalogInputException {
458        Object v = obj.get(key);
459        if (v == null) {
460            if (required) {
461                throw new CatalogInputException(
462                    "SQLDep entry missing required field '" + key + "'");
463            }
464            return null;
465        }
466        return v instanceof String ? (String) v : v.toString();
467    }
468
469    private static String typeOf(Object o) {
470        return o == null ? "null" : o.getClass().getSimpleName();
471    }
472
473    /** ServiceLoader-discoverable factory. Plan §13.1. */
474    public static final class Factory implements CatalogInputReaderFactory {
475
476        public Factory() {
477            // Required no-arg constructor for ServiceLoader.
478        }
479
480        @Override
481        public CatalogInputKind kind() {
482            return CatalogInputKind.SQLDEP_JSON;
483        }
484
485        @Override
486        public CatalogInputReader create() {
487            return new SqldepCatalogInputReader();
488        }
489    }
490}