001package gudusoft.gsqlparser.ir.semantic.export; 002 003import gudusoft.gsqlparser.ir.semantic.ColumnRef; 004import gudusoft.gsqlparser.ir.semantic.FrameBound; 005import gudusoft.gsqlparser.ir.semantic.LineageEdge; 006import gudusoft.gsqlparser.ir.semantic.LineageRef; 007import gudusoft.gsqlparser.ir.semantic.OutputColumn; 008import gudusoft.gsqlparser.ir.semantic.RelationSource; 009import gudusoft.gsqlparser.ir.semantic.RowLimit; 010import gudusoft.gsqlparser.ir.semantic.SemanticProgram; 011import gudusoft.gsqlparser.ir.semantic.SetOperator; 012import gudusoft.gsqlparser.ir.semantic.StatementGraph; 013import gudusoft.gsqlparser.ir.semantic.TargetRelation; 014import gudusoft.gsqlparser.ir.semantic.WindowFrame; 015import gudusoft.gsqlparser.ir.semantic.WindowSpec; 016import gudusoft.gsqlparser.ir.semantic.binding.RelationBinding; 017 018import java.util.List; 019 020/** 021 * Deterministic JSON exporter for {@link SemanticProgram}. Hand-rolled — 022 * no reflection, no map ordering surprises, no third-party dependency — 023 * so golden files stay byte-stable across JVMs and refactors. 024 * 025 * <p>Format is pretty-printed with two-space indent and a trailing newline. 026 * Field order within an object is fixed by the writer methods, not by any 027 * map iteration order. 028 */ 029public final class SemanticIRJsonExporter { 030 031 private static final String INDENT = " "; 032 033 /** 034 * JSON schema version emitted at the top of every exported 035 * program. Slice 75 freezes this at {@code "1"}; future breaking 036 * changes to the shape increment this value. Consumers can 037 * branch on the value to handle multiple shapes if needed. 038 * 039 * <p>Initialised in a static block (rather than as a literal 040 * compile-time constant) so binary consumers compiled against 041 * one version of this library don't silently see the old value 042 * after a drop-in JAR upgrade. The trade-off is a slightly 043 * verbose declaration; the upside is a durable version contract 044 * (codex diff-review round-1 Q3). 045 */ 046 public static final String SCHEMA_VERSION; 047 048 static { 049 SCHEMA_VERSION = "1"; 050 } 051 052 private SemanticIRJsonExporter() {} 053 054 public static String toJson(SemanticProgram program) { 055 if (program == null) { 056 throw new IllegalArgumentException("program must not be null"); 057 } 058 StringBuilder sb = new StringBuilder(); 059 sb.append("{\n"); 060 // Slice 75: schemaVersion is the first key in every exported 061 // program so consumers can short-circuit on it without parsing 062 // the full payload. 063 writeKey(sb, 1, "schemaVersion"); 064 writeString(sb, SCHEMA_VERSION); 065 sb.append(",\n"); 066 writeKey(sb, 1, "statements"); 067 sb.append("["); 068 List<StatementGraph> stmts = program.getStatements(); 069 if (!stmts.isEmpty()) { 070 sb.append("\n"); 071 for (int i = 0; i < stmts.size(); i++) { 072 writeStatement(sb, 2, stmts.get(i)); 073 if (i < stmts.size() - 1) sb.append(","); 074 sb.append("\n"); 075 } 076 indent(sb, 1); 077 } 078 sb.append("],\n"); 079 writeKey(sb, 1, "lineage"); 080 writeLineage(sb, 1, program.getLineage()); 081 sb.append("\n}\n"); 082 return sb.toString(); 083 } 084 085 private static void writeStatement(StringBuilder sb, int depth, StatementGraph s) { 086 indent(sb, depth); 087 sb.append("{\n"); 088 if (s.getName() != null) { 089 writeKey(sb, depth + 1, "name"); 090 writeString(sb, s.getName()); 091 sb.append(",\n"); 092 } 093 writeKey(sb, depth + 1, "kind"); 094 writeString(sb, s.getKind()); 095 sb.append(",\n"); 096 writeKey(sb, depth + 1, "distinct"); 097 sb.append(s.isDistinct() ? "true" : "false"); 098 sb.append(",\n"); 099 writeKey(sb, depth + 1, "setOperator"); 100 writeNullableString(sb, s.getSetOperator() == null 101 ? null : s.getSetOperator().name()); 102 sb.append(",\n"); 103 writeKey(sb, depth + 1, "rowLimit"); 104 writeRowLimit(sb, s.getRowLimit()); 105 sb.append(",\n"); 106 writeKey(sb, depth + 1, "target"); 107 writeTarget(sb, s.getTarget()); 108 sb.append(",\n"); 109 writeKey(sb, depth + 1, "relations"); 110 writeRelations(sb, depth + 1, s.getRelations()); 111 sb.append(",\n"); 112 writeKey(sb, depth + 1, "outputColumns"); 113 writeOutputColumns(sb, depth + 1, s.getOutputColumns()); 114 sb.append(",\n"); 115 writeKey(sb, depth + 1, "returningColumns"); 116 writeOutputColumns(sb, depth + 1, s.getReturningColumns()); 117 sb.append(",\n"); 118 writeKey(sb, depth + 1, "filterColumnRefs"); 119 writeColumnRefArray(sb, depth + 1, s.getFilterColumnRefs()); 120 sb.append(",\n"); 121 writeKey(sb, depth + 1, "joinColumnRefs"); 122 writeColumnRefArray(sb, depth + 1, s.getJoinColumnRefs()); 123 sb.append(",\n"); 124 writeKey(sb, depth + 1, "groupByColumnRefs"); 125 writeColumnRefArray(sb, depth + 1, s.getGroupByColumnRefs()); 126 sb.append(",\n"); 127 writeKey(sb, depth + 1, "havingColumnRefs"); 128 writeColumnRefArray(sb, depth + 1, s.getHavingColumnRefs()); 129 sb.append(",\n"); 130 writeKey(sb, depth + 1, "orderByColumnRefs"); 131 writeColumnRefArray(sb, depth + 1, s.getOrderByColumnRefs()); 132 sb.append(",\n"); 133 writeKey(sb, depth + 1, "distinctOnColumnRefs"); 134 writeColumnRefArray(sb, depth + 1, s.getDistinctOnColumnRefs()); 135 sb.append("\n"); 136 indent(sb, depth); 137 sb.append("}"); 138 } 139 140 private static void writeRelations(StringBuilder sb, int depth, List<RelationSource> rels) { 141 if (rels.isEmpty()) { 142 sb.append("[]"); 143 return; 144 } 145 sb.append("[\n"); 146 for (int i = 0; i < rels.size(); i++) { 147 RelationSource r = rels.get(i); 148 indent(sb, depth + 1); 149 sb.append("{"); 150 writeKeyInline(sb, "alias"); 151 writeString(sb, r.getAlias()); 152 sb.append(", "); 153 writeKeyInline(sb, "binding"); 154 writeBinding(sb, r.getBinding()); 155 sb.append("}"); 156 if (i < rels.size() - 1) sb.append(","); 157 sb.append("\n"); 158 } 159 indent(sb, depth); 160 sb.append("]"); 161 } 162 163 private static void writeBinding(StringBuilder sb, RelationBinding b) { 164 sb.append("{"); 165 writeKeyInline(sb, "kind"); 166 writeString(sb, b.getKind().name()); 167 sb.append(", "); 168 writeKeyInline(sb, "qualifiedName"); 169 writeString(sb, b.getQualifiedName()); 170 if (b.getOuterKind() != null) { 171 sb.append(", "); 172 writeKeyInline(sb, "outerKind"); 173 writeString(sb, b.getOuterKind().name()); 174 } 175 sb.append("}"); 176 } 177 178 private static void writeOutputColumns(StringBuilder sb, int depth, List<OutputColumn> cols) { 179 if (cols.isEmpty()) { 180 sb.append("[]"); 181 return; 182 } 183 sb.append("[\n"); 184 for (int i = 0; i < cols.size(); i++) { 185 OutputColumn c = cols.get(i); 186 indent(sb, depth + 1); 187 sb.append("{"); 188 writeKeyInline(sb, "name"); 189 writeString(sb, c.getName()); 190 sb.append(", "); 191 writeKeyInline(sb, "derived"); 192 sb.append(c.isDerived() ? "true" : "false"); 193 sb.append(", "); 194 writeKeyInline(sb, "aggregate"); 195 sb.append(c.isAggregate() ? "true" : "false"); 196 sb.append(", "); 197 writeKeyInline(sb, "window"); 198 writeWindowSpec(sb, c.getWindowSpec()); 199 sb.append(", "); 200 writeKeyInline(sb, "sources"); 201 writeColumnRefArrayInline(sb, c.getSources()); 202 sb.append("}"); 203 if (i < cols.size() - 1) sb.append(","); 204 sb.append("\n"); 205 } 206 indent(sb, depth); 207 sb.append("]"); 208 } 209 210 /** 211 * Slice 13: write the per-output {@code window} field. Emits the JSON 212 * literal {@code null} when {@code spec} is null (slice-8 always-emit 213 * shape-stability rule), or {@code {"partitionRefs": [...], "orderRefs": 214 * [...], "frame": null|{...}}} otherwise. Both inner arrays are always 215 * emitted; one may be empty when only PARTITION BY or only OVER 216 * ORDER BY is present. The {@code frame} key (slice 22) is also 217 * always emitted — null when no frame, an object otherwise. 218 */ 219 private static void writeWindowSpec(StringBuilder sb, WindowSpec spec) { 220 if (spec == null) { 221 sb.append("null"); 222 return; 223 } 224 sb.append("{"); 225 writeKeyInline(sb, "partitionRefs"); 226 writeColumnRefArrayInline(sb, spec.getPartitionRefs()); 227 sb.append(", "); 228 writeKeyInline(sb, "orderRefs"); 229 writeColumnRefArrayInline(sb, spec.getOrderRefs()); 230 sb.append(", "); 231 writeKeyInline(sb, "frame"); 232 writeWindowFrame(sb, spec.getFrame()); 233 sb.append("}"); 234 } 235 236 /** 237 * Slice 22: write the {@code window.frame} field. Emits {@code null} 238 * when no frame, otherwise {@code {"unit": "ROWS|RANGE|GROUPS", 239 * "start": {...}, "end": null|{...}}}. {@code end} is the literal 240 * null when the surface SQL used the unary form 241 * ({@code ROWS UNBOUNDED PRECEDING}). 242 */ 243 private static void writeWindowFrame(StringBuilder sb, WindowFrame frame) { 244 if (frame == null) { 245 sb.append("null"); 246 return; 247 } 248 sb.append("{"); 249 writeKeyInline(sb, "unit"); 250 writeString(sb, frame.getUnit().name()); 251 sb.append(", "); 252 writeKeyInline(sb, "start"); 253 writeFrameBound(sb, frame.getStart()); 254 sb.append(", "); 255 writeKeyInline(sb, "end"); 256 writeFrameBound(sb, frame.getEnd()); 257 sb.append("}"); 258 } 259 260 /** 261 * Slice 22: write a {@link FrameBound}. Emits {@code null} when the 262 * bound itself is null (the {@code end} of a unary frame), otherwise 263 * {@code {"kind": "...", "offsetLiteral": null|"..."}}. 264 * {@code offsetLiteral} is presentation text — it captures the 265 * SQL author's literal spelling and is NOT canonical across 266 * vendors. 267 */ 268 private static void writeFrameBound(StringBuilder sb, FrameBound bound) { 269 if (bound == null) { 270 sb.append("null"); 271 return; 272 } 273 sb.append("{"); 274 writeKeyInline(sb, "kind"); 275 writeString(sb, bound.getKind().name()); 276 sb.append(", "); 277 writeKeyInline(sb, "offsetLiteral"); 278 if (bound.getOffsetLiteral() == null) { 279 sb.append("null"); 280 } else { 281 writeString(sb, bound.getOffsetLiteral()); 282 } 283 sb.append("}"); 284 } 285 286 /** 287 * Slices 70 and 71: write the per-statement {@code rowLimit} field. 288 * Emits the JSON literal {@code null} when {@code rowLimit} is null 289 * (mirrors the slice-12 {@code setOperator} always-emit shape- 290 * stability rule), or 291 * {@code {"kind": "<kind>", "count": "<verbatim>|null", 292 * "offset": "<verbatim>|null"}} otherwise. 293 * 294 * <p>{@code kind} is one of {@code LIMIT}, {@code FETCH_FIRST}, 295 * {@code TOP}, or {@code OFFSET_FETCH}. {@code count} is normally 296 * a string but may be the JSON literal {@code null} when 297 * {@code kind == "OFFSET_FETCH"} and the SQL author wrote 298 * offset-only (e.g. PG {@code OFFSET 5} or Oracle 299 * {@code OFFSET 5 ROWS} without {@code FETCH NEXT}). {@code offset} 300 * is the JSON literal {@code null} when no offset is present. 301 */ 302 private static void writeRowLimit(StringBuilder sb, RowLimit rl) { 303 if (rl == null) { 304 sb.append("null"); 305 return; 306 } 307 sb.append("{"); 308 writeKeyInline(sb, "kind"); 309 writeString(sb, rl.getKind().name()); 310 sb.append(", "); 311 writeKeyInline(sb, "count"); 312 if (rl.getCount() == null) { 313 sb.append("null"); 314 } else { 315 writeString(sb, rl.getCount()); 316 } 317 sb.append(", "); 318 writeKeyInline(sb, "offset"); 319 if (rl.getOffset() == null) { 320 sb.append("null"); 321 } else { 322 writeString(sb, rl.getOffset()); 323 } 324 sb.append("}"); 325 } 326 327 /** 328 * Slice 78: write the per-statement {@code target} field for 329 * {@code INSERT INTO target SELECT ...} statements. Emits the JSON 330 * literal {@code null} when {@code target} is null (mirrors the 331 * always-emit shape-stability rule used by {@code setOperator} / 332 * {@code rowLimit}), or 333 * {@code {"table": "<qualified>", "columns": ["c1", "c2", ...]}} 334 * otherwise. The {@code columns} list is empty when the SQL author 335 * omitted the INSERT column list — consumers should fall back to the 336 * source SELECT's positional output names for the per-column lineage 337 * mapping. 338 */ 339 private static void writeTarget(StringBuilder sb, TargetRelation t) { 340 if (t == null) { 341 sb.append("null"); 342 return; 343 } 344 sb.append("{"); 345 writeKeyInline(sb, "table"); 346 writeString(sb, t.getBinding().getQualifiedName()); 347 sb.append(", "); 348 writeKeyInline(sb, "columns"); 349 sb.append("["); 350 List<String> cols = t.getColumns(); 351 for (int i = 0; i < cols.size(); i++) { 352 if (i > 0) sb.append(", "); 353 writeString(sb, cols.get(i)); 354 } 355 sb.append("]"); 356 sb.append("}"); 357 } 358 359 private static void writeColumnRefArray(StringBuilder sb, int depth, List<ColumnRef> refs) { 360 if (refs.isEmpty()) { 361 sb.append("[]"); 362 return; 363 } 364 sb.append("[\n"); 365 for (int i = 0; i < refs.size(); i++) { 366 indent(sb, depth + 1); 367 writeColumnRefInline(sb, refs.get(i)); 368 if (i < refs.size() - 1) sb.append(","); 369 sb.append("\n"); 370 } 371 indent(sb, depth); 372 sb.append("]"); 373 } 374 375 private static void writeColumnRefArrayInline(StringBuilder sb, List<ColumnRef> refs) { 376 sb.append("["); 377 for (int i = 0; i < refs.size(); i++) { 378 if (i > 0) sb.append(", "); 379 writeColumnRefInline(sb, refs.get(i)); 380 } 381 sb.append("]"); 382 } 383 384 private static void writeColumnRefInline(StringBuilder sb, ColumnRef r) { 385 sb.append("{"); 386 writeKeyInline(sb, "relationAlias"); 387 writeString(sb, r.getRelationAlias()); 388 sb.append(", "); 389 writeKeyInline(sb, "columnName"); 390 writeString(sb, r.getColumnName()); 391 sb.append("}"); 392 } 393 394 private static void writeLineage(StringBuilder sb, int depth, List<LineageEdge> edges) { 395 if (edges.isEmpty()) { 396 sb.append("[]"); 397 return; 398 } 399 sb.append("[\n"); 400 for (int i = 0; i < edges.size(); i++) { 401 indent(sb, depth + 1); 402 writeLineageEdgeInline(sb, edges.get(i)); 403 if (i < edges.size() - 1) sb.append(","); 404 sb.append("\n"); 405 } 406 indent(sb, depth); 407 sb.append("]"); 408 } 409 410 private static void writeLineageEdgeInline(StringBuilder sb, LineageEdge e) { 411 sb.append("{"); 412 writeKeyInline(sb, "from"); 413 writeLineageRefInline(sb, e.getFrom()); 414 sb.append(", "); 415 writeKeyInline(sb, "to"); 416 writeLineageRefInline(sb, e.getTo()); 417 sb.append("}"); 418 } 419 420 private static void writeLineageRefInline(StringBuilder sb, LineageRef ref) { 421 sb.append("{"); 422 writeKeyInline(sb, "kind"); 423 writeString(sb, ref.getKind().name()); 424 switch (ref.getKind()) { 425 case STATEMENT_OUTPUT: 426 sb.append(", "); 427 writeKeyInline(sb, "statementIndex"); 428 sb.append(ref.getStatementIndex()); 429 sb.append(", "); 430 writeKeyInline(sb, "outputName"); 431 writeString(sb, ref.getOutputName()); 432 break; 433 case TABLE_COLUMN: 434 sb.append(", "); 435 writeKeyInline(sb, "qualifiedName"); 436 writeString(sb, ref.getQualifiedName()); 437 sb.append(", "); 438 writeKeyInline(sb, "columnName"); 439 writeString(sb, ref.getColumnName()); 440 break; 441 } 442 sb.append("}"); 443 } 444 445 private static void writeKey(StringBuilder sb, int depth, String key) { 446 indent(sb, depth); 447 sb.append('"').append(escape(key)).append("\": "); 448 } 449 450 private static void writeKeyInline(StringBuilder sb, String key) { 451 sb.append('"').append(escape(key)).append("\": "); 452 } 453 454 private static void writeString(StringBuilder sb, String value) { 455 sb.append('"').append(escape(value)).append('"'); 456 } 457 458 /** 459 * Write a quoted string when {@code value} is non-null, or the JSON 460 * literal {@code null} when it is. Used for nullable scalar fields 461 * like {@code setOperator} (slice 12) where the absence of a value 462 * is itself meaningful. 463 */ 464 private static void writeNullableString(StringBuilder sb, String value) { 465 if (value == null) { 466 sb.append("null"); 467 } else { 468 sb.append('"').append(escape(value)).append('"'); 469 } 470 } 471 472 private static void indent(StringBuilder sb, int depth) { 473 for (int i = 0; i < depth; i++) sb.append(INDENT); 474 } 475 476 private static String escape(String s) { 477 StringBuilder out = new StringBuilder(s.length() + 2); 478 for (int i = 0; i < s.length(); i++) { 479 char c = s.charAt(i); 480 switch (c) { 481 case '"': out.append("\\\""); break; 482 case '\\': out.append("\\\\"); break; 483 case '\n': out.append("\\n"); break; 484 case '\r': out.append("\\r"); break; 485 case '\t': out.append("\\t"); break; 486 case '\b': out.append("\\b"); break; 487 case '\f': out.append("\\f"); break; 488 default: 489 if (c < 0x20) { 490 out.append(String.format("\\u%04x", (int) c)); 491 } else { 492 out.append(c); 493 } 494 } 495 } 496 return out.toString(); 497 } 498}