001package gudusoft.gsqlparser.ir.semantic.diff;
002
003import java.util.Collections;
004import java.util.LinkedHashMap;
005import java.util.LinkedHashSet;
006import java.util.Map;
007import java.util.Set;
008
009/**
010 * Common comparison form for both Semantic IR and dlineage. See the slice-7
011 * plan for the why.
012 *
013 * <ul>
014 *   <li>{@link #getEdges()} — the canonical edge set. SELECT edges have a
015 *       non-null {@code outputName}; FILTER/JOIN edges have null.</li>
016 *   <li>{@link #getOutputNames()} — every outer output column the side
017 *       produced (lower-cased). May contain names that have zero SELECT
018 *       edges (aggregate-only outputs such as {@code COUNT(*)}).</li>
019 *   <li>{@link #getAggregateByOutput()} — per-output aggregate flag. The
020 *       map covers exactly the names in {@link #getOutputNames()}.</li>
021 * </ul>
022 *
023 * <p>All names (output, base table, base column) in this model are
024 * lower-cased. Original casing is preserved by the JSON exporter for
025 * triage but never participates in comparison.
026 */
027public final class CanonicalLineageModel {
028
029    private final Set<CanonicalLineageEdge> edges;
030    private final Set<String> outputNames;
031    private final Map<String, Boolean> aggregateByOutput;
032
033    public CanonicalLineageModel(Set<CanonicalLineageEdge> edges,
034                                 Set<String> outputNames,
035                                 Map<String, Boolean> aggregateByOutput) {
036        if (edges == null || outputNames == null || aggregateByOutput == null) {
037            throw new IllegalArgumentException("edges/outputNames/aggregateByOutput must not be null");
038        }
039        // Sanity: aggregate map keys must equal outputNames.
040        if (!aggregateByOutput.keySet().equals(outputNames)) {
041            throw new IllegalArgumentException(
042                    "aggregateByOutput keys must equal outputNames; outputs="
043                            + outputNames + ", aggregateKeys=" + aggregateByOutput.keySet());
044        }
045        // Sanity: every SELECT edge's output must be in outputNames.
046        for (CanonicalLineageEdge e : edges) {
047            if (e.getRole() == EdgeRole.SELECT && !outputNames.contains(e.getOutputName())) {
048                throw new IllegalArgumentException(
049                        "SELECT edge references unknown output '" + e.getOutputName()
050                                + "'; outputs=" + outputNames);
051            }
052        }
053        this.edges = Collections.unmodifiableSet(new LinkedHashSet<>(edges));
054        this.outputNames = Collections.unmodifiableSet(new LinkedHashSet<>(outputNames));
055        this.aggregateByOutput = Collections.unmodifiableMap(new LinkedHashMap<>(aggregateByOutput));
056    }
057
058    public Set<CanonicalLineageEdge> getEdges() {
059        return edges;
060    }
061
062    public Set<String> getOutputNames() {
063        return outputNames;
064    }
065
066    public Map<String, Boolean> getAggregateByOutput() {
067        return aggregateByOutput;
068    }
069
070    /** Empty model used by {@link ProjectorResult} when projection is unsupported. */
071    public static CanonicalLineageModel empty() {
072        return new CanonicalLineageModel(
073                Collections.<CanonicalLineageEdge>emptySet(),
074                Collections.<String>emptySet(),
075                Collections.<String, Boolean>emptyMap());
076    }
077}