001package gudusoft.gsqlparser.ir.semantic.diff; 002 003import java.util.Collections; 004import java.util.LinkedHashMap; 005import java.util.LinkedHashSet; 006import java.util.Map; 007import java.util.Set; 008 009/** 010 * Common comparison form for both Semantic IR and dlineage. See the slice-7 011 * plan for the why. 012 * 013 * <ul> 014 * <li>{@link #getEdges()} — the canonical edge set. SELECT edges have a 015 * non-null {@code outputName}; FILTER/JOIN edges have null.</li> 016 * <li>{@link #getOutputNames()} — every outer output column the side 017 * produced (lower-cased). May contain names that have zero SELECT 018 * edges (aggregate-only outputs such as {@code COUNT(*)}).</li> 019 * <li>{@link #getAggregateByOutput()} — per-output aggregate flag. The 020 * map covers exactly the names in {@link #getOutputNames()}.</li> 021 * </ul> 022 * 023 * <p>All names (output, base table, base column) in this model are 024 * lower-cased. Original casing is preserved by the JSON exporter for 025 * triage but never participates in comparison. 026 */ 027public final class CanonicalLineageModel { 028 029 private final Set<CanonicalLineageEdge> edges; 030 private final Set<String> outputNames; 031 private final Map<String, Boolean> aggregateByOutput; 032 033 public CanonicalLineageModel(Set<CanonicalLineageEdge> edges, 034 Set<String> outputNames, 035 Map<String, Boolean> aggregateByOutput) { 036 if (edges == null || outputNames == null || aggregateByOutput == null) { 037 throw new IllegalArgumentException("edges/outputNames/aggregateByOutput must not be null"); 038 } 039 // Sanity: aggregate map keys must equal outputNames. 040 if (!aggregateByOutput.keySet().equals(outputNames)) { 041 throw new IllegalArgumentException( 042 "aggregateByOutput keys must equal outputNames; outputs=" 043 + outputNames + ", aggregateKeys=" + aggregateByOutput.keySet()); 044 } 045 // Sanity: every SELECT edge's output must be in outputNames. 046 for (CanonicalLineageEdge e : edges) { 047 if (e.getRole() == EdgeRole.SELECT && !outputNames.contains(e.getOutputName())) { 048 throw new IllegalArgumentException( 049 "SELECT edge references unknown output '" + e.getOutputName() 050 + "'; outputs=" + outputNames); 051 } 052 } 053 this.edges = Collections.unmodifiableSet(new LinkedHashSet<>(edges)); 054 this.outputNames = Collections.unmodifiableSet(new LinkedHashSet<>(outputNames)); 055 this.aggregateByOutput = Collections.unmodifiableMap(new LinkedHashMap<>(aggregateByOutput)); 056 } 057 058 public Set<CanonicalLineageEdge> getEdges() { 059 return edges; 060 } 061 062 public Set<String> getOutputNames() { 063 return outputNames; 064 } 065 066 public Map<String, Boolean> getAggregateByOutput() { 067 return aggregateByOutput; 068 } 069 070 /** Empty model used by {@link ProjectorResult} when projection is unsupported. */ 071 public static CanonicalLineageModel empty() { 072 return new CanonicalLineageModel( 073 Collections.<CanonicalLineageEdge>emptySet(), 074 Collections.<String>emptySet(), 075 Collections.<String, Boolean>emptyMap()); 076 } 077}