001package gudusoft.gsqlparser.ir.semantic.diff; 002 003import java.util.ArrayList; 004import java.util.Collections; 005import java.util.HashMap; 006import java.util.HashSet; 007import java.util.LinkedHashSet; 008import java.util.List; 009import java.util.Map; 010import java.util.Set; 011 012/** 013 * Pure comparison: two {@link CanonicalLineageModel}s in, one 014 * {@link DivergenceReport} out. Pass order matters and is documented 015 * inline below — see the slice-7 plan §"Comparison passes". 016 */ 017public final class DivergenceReporter { 018 019 private DivergenceReporter() {} 020 021 /** 022 * @param sqlName human-readable name (e.g. "01_alias_collision") 023 * @param ir result of projecting the Semantic IR 024 * @param dlineage result of projecting the dlineage XML 025 */ 026 public static DivergenceReport report(String sqlName, ProjectorResult ir, ProjectorResult dlineage) { 027 if (sqlName == null || sqlName.isEmpty()) { 028 throw new IllegalArgumentException("sqlName must be non-empty"); 029 } 030 if (ir == null || dlineage == null) { 031 throw new IllegalArgumentException("ir and dlineage results must not be null"); 032 } 033 034 // Unsupported-side short-circuit. We still produce both models in the 035 // report (empty for the failing side) so the JSON shape is uniform. 036 List<Divergence> divs = new ArrayList<>(); 037 if (!ir.isSupported()) { 038 divs.add(new Divergence(DivergenceClass.UNSUPPORTED_BY_IR, Divergence.QUERY_WIDE, 039 detailFor(ir))); 040 } 041 if (!dlineage.isSupported()) { 042 divs.add(new Divergence(DivergenceClass.UNSUPPORTED_BY_DLINEAGE, Divergence.QUERY_WIDE, 043 detailFor(dlineage))); 044 } 045 if (!ir.isSupported() || !dlineage.isSupported()) { 046 // When either side is unsupported the per-edge passes don't make 047 // sense; the query-wide divergence is the only one reported. 048 return new DivergenceReport(sqlName, ir.getModel(), dlineage.getModel(), 049 sortDivergences(divs)); 050 } 051 052 CanonicalLineageModel a = ir.getModel(); 053 CanonicalLineageModel b = dlineage.getModel(); 054 055 // Pass 1: output-presence symmetric difference. 056 // Drop SELECT edges for one-sided outputs so later passes don't double-count. 057 Set<String> aOnly = new LinkedHashSet<>(a.getOutputNames()); 058 aOnly.removeAll(b.getOutputNames()); 059 Set<String> bOnly = new LinkedHashSet<>(b.getOutputNames()); 060 bOnly.removeAll(a.getOutputNames()); 061 for (String n : aOnly) { 062 divs.add(new Divergence(DivergenceClass.IR_EXTRA_DEPENDENCY, n, 063 Divergence.DETAIL_OUTPUT_PRESENT)); 064 } 065 for (String n : bOnly) { 066 divs.add(new Divergence(DivergenceClass.IR_MISSING_DEPENDENCY, n, 067 Divergence.DETAIL_OUTPUT_PRESENT)); 068 } 069 070 Set<String> commonOutputs = new HashSet<>(a.getOutputNames()); 071 commonOutputs.retainAll(b.getOutputNames()); 072 073 Set<CanonicalLineageEdge> aEdges = filterEdgesForCommonOutputs(a.getEdges(), commonOutputs); 074 Set<CanonicalLineageEdge> bEdges = filterEdgesForCommonOutputs(b.getEdges(), commonOutputs); 075 076 // Pass 2: aggregation pass over common outputs. 077 for (String name : new java.util.TreeSet<>(commonOutputs)) { 078 Boolean ia = a.getAggregateByOutput().get(name); 079 Boolean da = b.getAggregateByOutput().get(name); 080 if (ia == null || da == null) continue; 081 if (!ia.equals(da)) { 082 divs.add(new Divergence(DivergenceClass.AGGREGATION_MISMATCH, name, 083 "ir=" + ia + ",dlineage=" + da)); 084 } 085 } 086 087 // Working sets we'll whittle down with binding/role passes. 088 Set<CanonicalLineageEdge> aRemaining = new LinkedHashSet<>(aEdges); 089 Set<CanonicalLineageEdge> bRemaining = new LinkedHashSet<>(bEdges); 090 091 // Pass 3: binding mismatch. 092 // Group by (role, outputName, baseColumn). If one side has exactly 093 // one edge and the other has exactly one edge with a different 094 // baseTable, that's a single binding mismatch. Drop both edges. 095 Map<String, List<CanonicalLineageEdge>> aByBindKey = groupBy(aRemaining, DivergenceReporter::bindingKey); 096 Map<String, List<CanonicalLineageEdge>> bByBindKey = groupBy(bRemaining, DivergenceReporter::bindingKey); 097 for (String key : new java.util.TreeSet<>(union(aByBindKey.keySet(), bByBindKey.keySet()))) { 098 List<CanonicalLineageEdge> al = aByBindKey.getOrDefault(key, Collections.emptyList()); 099 List<CanonicalLineageEdge> bl = bByBindKey.getOrDefault(key, Collections.emptyList()); 100 if (al.size() == 1 && bl.size() == 1) { 101 CanonicalLineageEdge ae = al.get(0); 102 CanonicalLineageEdge be = bl.get(0); 103 if (!ae.getBaseTable().equals(be.getBaseTable())) { 104 divs.add(new Divergence(DivergenceClass.BINDING_MISMATCH, 105 ae.getOutputName(), 106 ae.getRole() + " " + ae.getBaseColumn() 107 + " ir=" + ae.getBaseTable() 108 + ",dlineage=" + be.getBaseTable())); 109 aRemaining.remove(ae); 110 bRemaining.remove(be); 111 } 112 } 113 } 114 115 // Pass 4: role mismatch (FILTER vs JOIN). 116 // Group by (outputName, baseTable, baseColumn) without role. 117 Map<String, List<CanonicalLineageEdge>> aByRoleKey = groupBy(aRemaining, DivergenceReporter::roleAgnosticKey); 118 Map<String, List<CanonicalLineageEdge>> bByRoleKey = groupBy(bRemaining, DivergenceReporter::roleAgnosticKey); 119 for (String key : new java.util.TreeSet<>(union(aByRoleKey.keySet(), bByRoleKey.keySet()))) { 120 List<CanonicalLineageEdge> al = aByRoleKey.getOrDefault(key, Collections.emptyList()); 121 List<CanonicalLineageEdge> bl = bByRoleKey.getOrDefault(key, Collections.emptyList()); 122 if (al.size() == 1 && bl.size() == 1) { 123 CanonicalLineageEdge ae = al.get(0); 124 CanonicalLineageEdge be = bl.get(0); 125 if (ae.getRole() != be.getRole()) { 126 divs.add(new Divergence(DivergenceClass.FILTER_OR_JOIN_SCOPE_MISMATCH, 127 ae.getOutputName(), 128 ae.getBaseTable() + "." + ae.getBaseColumn() 129 + " ir=" + ae.getRole() + ",dlineage=" + be.getRole())); 130 aRemaining.remove(ae); 131 bRemaining.remove(be); 132 } 133 } 134 } 135 136 // Pass 5: symmetric difference. Whatever is left is a flat 137 // missing/extra divergence. Sort by canonical edge order so 138 // emission is deterministic. 139 List<CanonicalLineageEdge> aLeft = new ArrayList<>(aRemaining); 140 aLeft.removeAll(bRemaining); 141 List<CanonicalLineageEdge> bLeft = new ArrayList<>(bRemaining); 142 bLeft.removeAll(aRemaining); 143 Collections.sort(aLeft, CanonicalLineageEdge.ORDER); 144 Collections.sort(bLeft, CanonicalLineageEdge.ORDER); 145 for (CanonicalLineageEdge e : aLeft) { 146 divs.add(new Divergence(DivergenceClass.IR_EXTRA_DEPENDENCY, 147 e.getOutputName(), edgeDetail(e))); 148 } 149 for (CanonicalLineageEdge e : bLeft) { 150 divs.add(new Divergence(DivergenceClass.IR_MISSING_DEPENDENCY, 151 e.getOutputName(), edgeDetail(e))); 152 } 153 154 return new DivergenceReport(sqlName, a, b, sortDivergences(divs)); 155 } 156 157 private static String detailFor(ProjectorResult r) { 158 StringBuilder sb = new StringBuilder(r.getReason().name()); 159 if (r.getDetail() != null && !r.getDetail().isEmpty()) { 160 sb.append(": ").append(r.getDetail()); 161 } 162 return sb.toString(); 163 } 164 165 private static Set<CanonicalLineageEdge> filterEdgesForCommonOutputs( 166 Set<CanonicalLineageEdge> edges, Set<String> commonOutputs) { 167 // SELECT edges keyed by an output not in both sides → drop (already 168 // accounted for by output-presence pass). FILTER/JOIN edges have null 169 // outputName and always pass through. 170 Set<CanonicalLineageEdge> out = new LinkedHashSet<>(); 171 for (CanonicalLineageEdge e : edges) { 172 if (e.getRole() == EdgeRole.SELECT && !commonOutputs.contains(e.getOutputName())) continue; 173 out.add(e); 174 } 175 return out; 176 } 177 178 private static String edgeDetail(CanonicalLineageEdge e) { 179 return e.getRole() + " " + e.getBaseTable() + "." + e.getBaseColumn(); 180 } 181 182 private static String bindingKey(CanonicalLineageEdge e) { 183 // Include role so SELECT bindings don't merge with FILTER bindings 184 // for the same base column. 185 return e.getRole().name() + "" 186 + (e.getOutputName() == null ? "" : e.getOutputName()) + "" 187 + e.getBaseColumn(); 188 } 189 190 private static String roleAgnosticKey(CanonicalLineageEdge e) { 191 return (e.getOutputName() == null ? "" : e.getOutputName()) + "" 192 + e.getBaseTable() + "" 193 + e.getBaseColumn(); 194 } 195 196 private static Map<String, List<CanonicalLineageEdge>> groupBy( 197 Set<CanonicalLineageEdge> edges, 198 java.util.function.Function<CanonicalLineageEdge, String> keyFn) { 199 Map<String, List<CanonicalLineageEdge>> out = new HashMap<>(); 200 for (CanonicalLineageEdge e : edges) { 201 out.computeIfAbsent(keyFn.apply(e), k -> new ArrayList<>()).add(e); 202 } 203 return out; 204 } 205 206 private static Set<String> union(Set<String> a, Set<String> b) { 207 Set<String> out = new HashSet<>(a); 208 out.addAll(b); 209 return out; 210 } 211 212 private static List<Divergence> sortDivergences(List<Divergence> divs) { 213 List<Divergence> sorted = new ArrayList<>(divs); 214 Collections.sort(sorted, Divergence.ORDER); 215 return sorted; 216 } 217}