001package gudusoft.gsqlparser.catalog.input.readers; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.catalog.input.CatalogInputException; 005import gudusoft.gsqlparser.catalog.input.CatalogInputKind; 006import gudusoft.gsqlparser.catalog.input.CatalogInputReader; 007import gudusoft.gsqlparser.catalog.input.CatalogInputReaderFactory; 008import gudusoft.gsqlparser.catalog.input.CatalogInputSource; 009import gudusoft.gsqlparser.catalog.input.CatalogLoadOptions; 010import gudusoft.gsqlparser.catalog.input.model.CatalogModel; 011import gudusoft.gsqlparser.catalog.input.model.CatalogSourceInfo; 012import gudusoft.gsqlparser.catalog.input.model.ColumnModel; 013import gudusoft.gsqlparser.catalog.input.model.DefaultsConfig; 014import gudusoft.gsqlparser.catalog.input.model.SchemaModel; 015import gudusoft.gsqlparser.catalog.input.model.TableModel; 016import gudusoft.gsqlparser.catalog.input.model.UnifiedCatalogModel; 017import gudusoft.gsqlparser.catalog.input.model.ViewModel; 018import gudusoft.gsqlparser.util.json.JSON; 019 020import java.io.BufferedReader; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.InputStreamReader; 024import java.io.Reader; 025import java.nio.charset.StandardCharsets; 026import java.nio.file.Files; 027import java.util.LinkedHashMap; 028import java.util.List; 029import java.util.Map; 030 031/** 032 * Reader for {@link CatalogInputKind#SQLDEP_JSON} sources — the JSON shape the legacy 033 * {@code SQLDepSQLEnv} consumes (plan §11 T2.A.4 / first Phase 2.A static-file adapter). 034 * 035 * <p>Per plan §6 / §11.2 this reader funnels the SQLDep export shape into the same 036 * {@link UnifiedCatalogModel} the JSON-manifest reader uses; the eager bridge then 037 * materializes a {@code TSQLEnv} byte-for-byte equivalent to what the legacy 038 * {@code SQLDepSQLEnv} would build (parity test in {@code SqldepLegacyParityTest}). 039 * The legacy path remains untouched per plan §8.5.</p> 040 * 041 * <p>SQLDep export schema (key fields):</p> 042 * <pre>{@code 043 * { 044 * "createdBy": "sqldep ...", // optional but conventional; not required 045 * "databaseModel": { ... }, // optional outer wrapper 046 * "databases": [ 047 * { 048 * "name": "ORCL", 049 * "tables": [ 050 * { 051 * "schema": "HR", // per-table schema label 052 * "name": "EMPLOYEES", 053 * "isView": "false", // optional, string boolean 054 * "columns": [{"name": "..."}, ...] 055 * } 056 * ] 057 * } 058 * ] 059 * } 060 * }</pre> 061 * 062 * <p>The reader is dispatch-by-{@link CatalogInputKind#SQLDEP_JSON} (callers tag the 063 * source explicitly). Auto-claim by {@code createdBy} content marker is intentionally 064 * not implemented here — that dispatch already lives in the legacy 065 * {@code TJSONSQLEnvParser} and reproducing it inside a {@code supports()} pre-scan 066 * would force every SQLDep candidate source to be drained twice.</p> 067 * 068 * <p>Identifier names with embedded dots are wrapped with the vendor's quote 069 * character before storage. Without the wrap, {@code ModelBackedCatalogProvider} 070 * later concatenates segments with {@code .} into qualified strings (e.g. 071 * {@code catalog.schema.A.B}) that {@code CatalogIdentifierPolicy.parse} would 072 * split into four segments instead of three, and the runtime would never resolve 073 * the SQL reference {@code "A.B"}. The legacy {@code SQLDepSQLEnv} carries the 074 * same defense — the delimiter chars come from {@code TSQLEnv.delimitedChar} — 075 * and we duplicate that table here so {@code catalog/**} stays free of a 076 * compile-time dependency on the {@code sqlenv} runtime layer. 077 * {@code CatalogModelValidator} still surfaces an identifier-bypass WARN when 078 * the produced names disagree with {@code IdentifierService.normalize}, exactly 079 * as for the JSON-manifest reader.</p> 080 */ 081public final class SqldepCatalogInputReader implements CatalogInputReader { 082 083 public SqldepCatalogInputReader() { 084 } 085 086 @Override 087 public CatalogInputKind kind() { 088 return CatalogInputKind.SQLDEP_JSON; 089 } 090 091 @Override 092 public boolean supports(CatalogInputSource source, CatalogLoadOptions options) { 093 if (source == null || source.inMemoryModel() != null) { 094 return false; 095 } 096 return source.declaredKind() == CatalogInputKind.SQLDEP_JSON; 097 } 098 099 @Override 100 public UnifiedCatalogModel read(CatalogInputSource source, CatalogLoadOptions options) 101 throws CatalogInputException { 102 if (source == null) { 103 throw new CatalogInputException("SqldepCatalogInputReader: source is required"); 104 } 105 final EDbVendor vendor = (options != null && options.vendor() != null) 106 ? options.vendor() 107 : EDbVendor.dbvoracle; // SQLDep exports historically default to Oracle 108 109 long start = System.currentTimeMillis(); 110 String text = readAll(source); 111 Object parsed; 112 try { 113 parsed = JSON.parseObject(text); 114 } catch (RuntimeException ex) { 115 throw new CatalogInputException( 116 "Failed to parse SQLDep JSON from " + source.name() + ": " + ex.getMessage(), 117 ex); 118 } 119 if (!(parsed instanceof Map)) { 120 throw new CatalogInputException( 121 "SQLDep root must be an object (got " + typeOf(parsed) + ")"); 122 } 123 @SuppressWarnings("unchecked") 124 Map<Object, Object> root = (Map<Object, Object>) parsed; 125 126 // Optional outer wrapper — mirror SQLDepSQLEnv.initSQLEnv(). 127 Map<Object, Object> body = root; 128 Object dm = root.get("databaseModel"); 129 if (dm instanceof Map) { 130 @SuppressWarnings("unchecked") 131 Map<Object, Object> dmMap = (Map<Object, Object>) dm; 132 body = dmMap; 133 } else if (dm != null) { 134 throw new CatalogInputException( 135 "SQLDep field 'databaseModel' must be an object (got " + typeOf(dm) + ")"); 136 } 137 138 try { 139 UnifiedCatalogModel.Builder mb = UnifiedCatalogModel.builder().vendor(vendor); 140 applyOptionDefaults(mb, options); 141 mb.sourceInfo(buildSourceInfo(source, start)); 142 143 Object databases = body.get("databases"); 144 if (databases == null) { 145 // Empty SQLDep export — return a vendor-only model. Validator gives a 146 // WARN downstream if defaults are missing; structural emptiness here is 147 // legal (and matches SQLDepSQLEnv's silent no-op when "databases" is null). 148 return mb.build(); 149 } 150 if (!(databases instanceof List)) { 151 throw new CatalogInputException( 152 "SQLDep field 'databases' must be an array (got " + typeOf(databases) + ")"); 153 } 154 for (Object dobj : (List<?>) databases) { 155 if (!(dobj instanceof Map)) { 156 throw new CatalogInputException( 157 "SQLDep databases[i] must be an object (got " + typeOf(dobj) + ")"); 158 } 159 @SuppressWarnings("unchecked") 160 Map<Object, Object> dmap = (Map<Object, Object>) dobj; 161 CatalogModel catalog = parseDatabase(dmap, vendor); 162 if (catalog != null) { 163 mb.addCatalog(catalog); 164 } 165 } 166 return mb.build(); 167 } catch (IllegalArgumentException ex) { 168 // Model builders enforce structural invariants (non-empty names, etc.) by 169 // throwing IllegalArgumentException. Surface those through the reader's 170 // checked-exception channel so callers using try/catch on 171 // CatalogInputException don't get caught off guard. 172 throw new CatalogInputException( 173 "Malformed SQLDep export from " + source.name() + ": " + ex.getMessage(), ex); 174 } 175 } 176 177 // ---------- catalog / schema / table parsing ---------- 178 179 /** 180 * Walk one {@code databases[i]} entry into a {@link CatalogModel}. SQLDep stores the 181 * schema name on each table, so we group tables by their {@code schema} field before 182 * building {@link SchemaModel}s — preserves insertion order so the round-trip with 183 * {@code SQLEnvCatalogLoader} carries the same ordering as the legacy loader. 184 * 185 * <p>Catalog / schema / table names are wrapped with the vendor's quote char when 186 * they contain a dot — see {@link #wrapDottedName} for the rationale.</p> 187 */ 188 private CatalogModel parseDatabase(Map<Object, Object> dmap, EDbVendor vendor) 189 throws CatalogInputException { 190 String dbName = wrapDottedName(asString(dmap, "name", true), vendor); 191 Object tables = dmap.get("tables"); 192 Map<String, SchemaGroup> groups = new LinkedHashMap<String, SchemaGroup>(); 193 if (tables != null) { 194 if (!(tables instanceof List)) { 195 throw new CatalogInputException( 196 "SQLDep database '" + dbName + "' field 'tables' must be an array (got " 197 + typeOf(tables) + ")"); 198 } 199 for (Object tobj : (List<?>) tables) { 200 if (!(tobj instanceof Map)) { 201 throw new CatalogInputException( 202 "SQLDep database '" + dbName + "' tables[i] must be an object (got " 203 + typeOf(tobj) + ")"); 204 } 205 @SuppressWarnings("unchecked") 206 Map<Object, Object> tmap = (Map<Object, Object>) tobj; 207 String rawSchema = asString(tmap, "schema", false); 208 String schemaName = (rawSchema == null) ? "" : wrapDottedName(rawSchema, vendor); 209 SchemaGroup g = groups.get(schemaName); 210 if (g == null) { 211 g = new SchemaGroup(schemaName); 212 groups.put(schemaName, g); 213 } 214 g.addTable(tmap, vendor); 215 } 216 } 217 CatalogModel.Builder cb = CatalogModel.builder().name(dbName); 218 for (SchemaGroup g : groups.values()) { 219 cb.addSchema(g.build()); 220 } 221 return cb.build(); 222 } 223 224 private static final class SchemaGroup { 225 private final SchemaModel.Builder b; 226 227 SchemaGroup(String name) { 228 this.b = SchemaModel.builder().name(name); 229 } 230 231 void addTable(Map<Object, Object> tmap, EDbVendor vendor) 232 throws CatalogInputException { 233 String tableName = wrapDottedName(asString(tmap, "name", true), vendor); 234 boolean isView = parseStringBoolean(tmap.get("isView")); 235 // Column names are NOT wrapped: ModelBackedCatalogProvider builds the 236 // qualified column form as `<tableQ>.<column>` where <tableQ> already has 237 // any required quoting. A column name with an embedded dot would still be 238 // ambiguous downstream — but the legacy loader has the same gap, so parity 239 // is preserved without the extra wrapping here. 240 List<ColumnModel> cols = parseColumns(tmap, tableName); 241 if (isView) { 242 ViewModel.Builder vb = ViewModel.builder().name(tableName); 243 for (ColumnModel c : cols) { 244 vb.addColumn(c); 245 } 246 b.addView(vb.build()); 247 } else { 248 TableModel.Builder tb = TableModel.builder().name(tableName); 249 for (ColumnModel c : cols) { 250 tb.addColumn(c); 251 } 252 b.addTable(tb.build()); 253 } 254 } 255 256 SchemaModel build() { 257 return b.build(); 258 } 259 } 260 261 /** 262 * Wrap a raw identifier with the vendor's open/close quote chars when it contains 263 * a dot. Mirrors the legacy {@code SQLDepSQLEnv} defense: without the wrap, the 264 * downstream string concatenation in {@code ModelBackedCatalogProvider.qualified} 265 * would emit {@code catalog.schema.A.B} for a name {@code "A.B"}, and 266 * {@code CatalogIdentifierPolicy.parse} would split that into four segments — 267 * the table would never resolve. Already-wrapped names (delimiter chars on both 268 * ends) are returned unchanged so a manifest that already quoted its dotted 269 * names round-trips intact. 270 * 271 * <p>The vendor → delimiter-pair mapping is duplicated from 272 * {@code TSQLEnv.delimitedChar}; we keep {@code catalog/**} free of a 273 * compile-time dependency on {@code sqlenv} for layering reasons. If a new 274 * dialect adds a non-{@code "..."} quote rule there, it must also be added here.</p> 275 */ 276 private static String wrapDottedName(String raw, EDbVendor vendor) { 277 if (raw == null || raw.isEmpty() || raw.indexOf('.') == -1) { 278 return raw; 279 } 280 char open = vendorOpenDelimiter(vendor); 281 char close = vendorCloseDelimiter(vendor); 282 // If the raw name is already wrapped on both ends with the vendor's pair, 283 // do nothing — the caller already handed us a delimited form and round-trip 284 // through the parser would just nest the delimiters. 285 if (raw.length() >= 2 286 && raw.charAt(0) == open 287 && raw.charAt(raw.length() - 1) == close) { 288 return raw; 289 } 290 return open + raw + close; 291 } 292 293 private static char vendorOpenDelimiter(EDbVendor v) { 294 if (v == null) return '"'; 295 switch (v) { 296 case dbvmssql: 297 case dbvazuresql: 298 return '['; 299 case dbvathena: 300 case dbvmysql: 301 case dbvbigquery: 302 case dbvcouchbase: 303 case dbvhive: 304 case dbvimpala: 305 case dbvdatabricks: 306 return '`'; 307 case dbvdax: 308 return '\''; 309 default: 310 return '"'; 311 } 312 } 313 314 /** 315 * MSSQL/Azure use {@code [name]} (asymmetric); every other vendor uses the same 316 * quote char on both ends. The legacy {@code TSQLEnv.delimitedChar} returns just 317 * the open char and the legacy loader uses it on both sides — that produces an 318 * invalid SQL form ({@code [foo[}) but happens to work for the legacy lookup 319 * because the env stores the literal string as a single key. We use a proper 320 * close-bracket here so the wrapped name is also valid SQL when echoed in 321 * diagnostics; lookups still succeed because {@code IdentifierConfig.stripQuotedDelimiters} 322 * canonicalizes both forms before comparison. 323 */ 324 private static char vendorCloseDelimiter(EDbVendor v) { 325 if (v == null) return '"'; 326 switch (v) { 327 case dbvmssql: 328 case dbvazuresql: 329 return ']'; 330 default: 331 return vendorOpenDelimiter(v); 332 } 333 } 334 335 /** 336 * SQLDep encodes booleans as quoted strings ({@code "true"} / {@code "false"}) — see 337 * {@code SQLDepSQLEnv.initSQLEnv} which calls {@code Boolean.parseBoolean(...)}. We 338 * mirror that: any non-string value tolerates a real Boolean too, and missing ⇒ 339 * false (a regular table). 340 */ 341 private static boolean parseStringBoolean(Object v) { 342 if (v == null) return false; 343 if (v instanceof Boolean) return (Boolean) v; 344 return Boolean.parseBoolean(v.toString()); 345 } 346 347 private static List<ColumnModel> parseColumns(Map<Object, Object> tmap, String tableName) 348 throws CatalogInputException { 349 Object columns = tmap.get("columns"); 350 java.util.ArrayList<ColumnModel> out = new java.util.ArrayList<ColumnModel>(); 351 if (columns == null) { 352 return out; 353 } 354 if (!(columns instanceof List)) { 355 throw new CatalogInputException( 356 "SQLDep table '" + tableName + "' field 'columns' must be an array (got " 357 + typeOf(columns) + ")"); 358 } 359 for (Object cobj : (List<?>) columns) { 360 if (!(cobj instanceof Map)) { 361 throw new CatalogInputException( 362 "SQLDep table '" + tableName + "' columns[i] must be an object (got " 363 + typeOf(cobj) + ")"); 364 } 365 @SuppressWarnings("unchecked") 366 Map<Object, Object> cmap = (Map<Object, Object>) cobj; 367 String colName = asString(cmap, "name", true); 368 ColumnModel.Builder cb = ColumnModel.builder().name(colName); 369 String dt = asString(cmap, "dataType", false); 370 if (dt == null) dt = asString(cmap, "type", false); 371 if (dt != null) cb.dataType(dt); 372 out.add(cb.build()); 373 } 374 return out; 375 } 376 377 // ---------- defaults / source info ---------- 378 379 private static void applyOptionDefaults(UnifiedCatalogModel.Builder mb, 380 CatalogLoadOptions options) { 381 if (options == null) return; 382 DefaultsConfig.Builder db = DefaultsConfig.builder(); 383 boolean any = false; 384 if (options.defaultCatalog() != null && !options.defaultCatalog().isEmpty()) { 385 db.defaultCatalog(options.defaultCatalog()); 386 any = true; 387 } 388 if (options.defaultSchema() != null && !options.defaultSchema().isEmpty()) { 389 db.defaultSchema(options.defaultSchema()); 390 any = true; 391 } 392 if (options.defaultServer() != null && !options.defaultServer().isEmpty()) { 393 db.defaultServer(options.defaultServer()); 394 any = true; 395 } 396 if (any) { 397 mb.defaults(db.build()); 398 } 399 } 400 401 private static CatalogSourceInfo buildSourceInfo(CatalogInputSource source, long startMillis) { 402 return CatalogSourceInfo.builder() 403 .kind(CatalogInputKind.SQLDEP_JSON) 404 .name(source.name() != null ? source.name() : "<sqldep>") 405 .readMillis(System.currentTimeMillis() - startMillis) 406 .build(); 407 } 408 409 // ---------- input → string ---------- 410 411 private static String readAll(CatalogInputSource source) throws CatalogInputException { 412 try { 413 if (source.inMemoryModel() != null) { 414 throw new CatalogInputException( 415 "SqldepCatalogInputReader cannot read in-memory model sources"); 416 } 417 if (source.path() != null) { 418 byte[] b = Files.readAllBytes(source.path()); 419 return new String(b, StandardCharsets.UTF_8); 420 } 421 byte[] sourceBytes = source.bytes(); 422 if (sourceBytes != null) { 423 return new String(sourceBytes, StandardCharsets.UTF_8); 424 } 425 if (source.url() != null) { 426 try (InputStream in = source.url().openStream(); 427 Reader r = new InputStreamReader(in, StandardCharsets.UTF_8)) { 428 return drain(r); 429 } 430 } 431 if (source.reader() != null) { 432 return drain(source.reader()); 433 } 434 throw new CatalogInputException( 435 "SqldepCatalogInputReader: source has no readable backing"); 436 } catch (IOException io) { 437 throw new CatalogInputException( 438 "Failed to read SQLDep export from " + source.name() + ": " + io.getMessage(), 439 io); 440 } 441 } 442 443 private static String drain(Reader r) throws IOException { 444 BufferedReader br = (r instanceof BufferedReader) ? (BufferedReader) r : new BufferedReader(r); 445 StringBuilder sb = new StringBuilder(); 446 char[] buf = new char[4096]; 447 int n; 448 while ((n = br.read(buf)) > 0) { 449 sb.append(buf, 0, n); 450 } 451 return sb.toString(); 452 } 453 454 // ---------- shared field accessors ---------- 455 456 private static String asString(Map<Object, Object> obj, String key, boolean required) 457 throws CatalogInputException { 458 Object v = obj.get(key); 459 if (v == null) { 460 if (required) { 461 throw new CatalogInputException( 462 "SQLDep entry missing required field '" + key + "'"); 463 } 464 return null; 465 } 466 return v instanceof String ? (String) v : v.toString(); 467 } 468 469 private static String typeOf(Object o) { 470 return o == null ? "null" : o.getClass().getSimpleName(); 471 } 472 473 /** ServiceLoader-discoverable factory. Plan §13.1. */ 474 public static final class Factory implements CatalogInputReaderFactory { 475 476 public Factory() { 477 // Required no-arg constructor for ServiceLoader. 478 } 479 480 @Override 481 public CatalogInputKind kind() { 482 return CatalogInputKind.SQLDEP_JSON; 483 } 484 485 @Override 486 public CatalogInputReader create() { 487 return new SqldepCatalogInputReader(); 488 } 489 } 490}