Source code

001package gudusoft.gsqlparser.ir.semantic;
002
003import gudusoft.gsqlparser.ir.semantic.joinanalysis.JoinAnalysisFacts;
004import gudusoft.gsqlparser.ir.semantic.joinanalysis.JoinGraph;
005
006import java.util.ArrayList;
007import java.util.Collections;
008import java.util.List;
009
010/**
011 * Semantic shape of one SQL statement. Currently covers SELECT.
012 *
013 * <p>{@link #name} is non-null when this statement is the body of a named
014 * CTE or a FROM-clause subquery. For top-level outer SELECTs it is null.
015 *
016 * <p>{@link #filterColumnRefs}, {@link #joinColumnRefs},
017 * {@link #groupByColumnRefs}, {@link #havingColumnRefs}, and
018 * {@link #orderByColumnRefs} are flat lists of column references that
019 * appear in the WHERE, JOIN predicate (ON / USING), GROUP BY, HAVING,
020 * and ORDER BY clauses respectively. For {@code JOIN ... USING (k)}
021 * (slice 64) {@code joinColumnRefs} contains one ref per
022 * (relation, key) pair on both sides — left side first via
023 * catalog-aware narrowing, then the right side. The IR deliberately
024 * does <i>not</i> model structured
025 * {@code Filter}, {@code Join}, or {@code GroupBy} nodes with predicate
026 * trees yet; later slices will add them. Listing the affected columns
027 * is enough to answer the roadmap's questions about
028 * filter/join/grouping/having/ordering influence.
029 *
030 * <p>{@link #groupingElements} (slice 128) is the structured companion to
031 * the flat {@link #groupByColumnRefs}: one {@link GroupingElement} per
032 * top-level {@code GROUP BY} item, preserving the {@code SIMPLE} /
033 * {@code ROLLUP} / {@code CUBE} / {@code GROUPING SETS} structure the flat
034 * list discards. See {@link #getGroupingElements()}.
035 *
036 * <p>{@link #orderByColumnRefs} only ever contains references to physical
037 * (base or in-statement) columns. Ordinal references ({@code ORDER BY 1})
038 * and bare-constant sort keys are rejected by the builder — emitting
039 * {@code []} for them would lose the dependency information silently.
040 *
041 * <p>Slice 9 (single-SELECT) rejects projection-alias references like
042 * {@code SELECT id AS x ... ORDER BY x}. Slice 21 (set-op outer)
043 * <i>accepts</i> alias references positionally against branch[0]'s
044 * outputs — the alias IS the set-op output schema. The two paths
045 * diverge intentionally; see
046 * {@code SemanticIRBuilder.buildOrderByColumnRefs} (slice 9) versus
047 * {@code SemanticIRBuilder.buildSetOpOuterOrderByColumnRefs} (slice 21).
048 */
049public final class StatementGraph {
050
051    private final String name;
052    private final String kind;
053    private final List<RelationSource> relations;
054    private final List<OutputColumn> outputColumns;
055    private final List<OutputColumn> returningColumns;
056    private final List<ColumnRef> filterColumnRefs;
057    private final List<ColumnRef> joinColumnRefs;
058    private final List<ColumnRef> groupByColumnRefs;
059    private final List<ColumnRef> havingColumnRefs;
060    private final List<ColumnRef> orderByColumnRefs;
061    private final List<ColumnRef> distinctOnColumnRefs;
062    private final List<ColumnRef> qualifyColumnRefs;
063    private final List<GroupingElement> groupingElements;
064    private final List<ColumnRef> pivotColumnRefs;
065    private final boolean distinct;
066    private final SetOperator setOperator;
067    private final RowLimit rowLimit;
068    private final TargetRelation target;
069    /**
070     * Join-analysis facts (slice 167, GAP 1/2/4): the single optional
071     * carrier holding the structured {@link JoinGraph}, the WHERE filter
072     * predicates, and the query-block scope. Never null — defaults to
073     * {@link JoinAnalysisFacts#EMPTY} for every legacy constructor.
074     */
075    private final JoinAnalysisFacts joinAnalysisFacts;
076    /**
077     * Optional block-level source span (slice 179, R5) covering this
078     * statement's own text. Null when not set (e.g. DML / set-op paths that
079     * do not thread the parse node). Read by {@code attachQueryBlockScopes}
080     * to populate {@link QueryBlockScope#getSourceSpan()}.
081     */
082    private final SourceSpan sourceSpan;
083
084    /**
085     * Slice 129 primary constructor — adds the optional
086     * {@code pivotColumnRefs} slot, the columns CONSUMED by a {@code PIVOT}
087     * operator (the {@code FOR} / pivot column(s) followed by the
088     * aggregation-function argument column(s)). The slot is always non-null
089     * (use {@link Collections#emptyList()} when absent); non-empty only on a
090     * SELECT whose FROM source is a {@code PIVOT}. All other slots are
091     * unchanged. See {@link #getPivotColumnRefs()}.
092     */
093    public StatementGraph(String name,
094                          String kind,
095                          List<RelationSource> relations,
096                          List<OutputColumn> outputColumns,
097                          List<OutputColumn> returningColumns,
098                          List<ColumnRef> filterColumnRefs,
099                          List<ColumnRef> joinColumnRefs,
100                          List<ColumnRef> groupByColumnRefs,
101                          List<ColumnRef> havingColumnRefs,
102                          List<ColumnRef> orderByColumnRefs,
103                          List<ColumnRef> distinctOnColumnRefs,
104                          List<ColumnRef> qualifyColumnRefs,
105                          List<GroupingElement> groupingElements,
106                          List<ColumnRef> pivotColumnRefs,
107                          boolean distinct,
108                          SetOperator setOperator,
109                          RowLimit rowLimit,
110                          TargetRelation target) {
111        this(name, kind, relations, outputColumns, returningColumns,
112                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
113                havingColumnRefs, orderByColumnRefs, distinctOnColumnRefs,
114                qualifyColumnRefs, groupingElements, pivotColumnRefs,
115                distinct, setOperator, rowLimit, target,
116                JoinAnalysisFacts.EMPTY);
117    }
118
119    /**
120     * Slice 167 primary constructor — adds the single optional
121     * {@link JoinAnalysisFacts} carrier (GAP 1/2/4). Every prior
122     * constructor delegates here with {@link JoinAnalysisFacts#EMPTY}, so
123     * the additive slot does not grow the constructor surface per GAP. A
124     * null {@code joinAnalysisFacts} coalesces to {@code EMPTY}.
125     */
126    public StatementGraph(String name,
127                          String kind,
128                          List<RelationSource> relations,
129                          List<OutputColumn> outputColumns,
130                          List<OutputColumn> returningColumns,
131                          List<ColumnRef> filterColumnRefs,
132                          List<ColumnRef> joinColumnRefs,
133                          List<ColumnRef> groupByColumnRefs,
134                          List<ColumnRef> havingColumnRefs,
135                          List<ColumnRef> orderByColumnRefs,
136                          List<ColumnRef> distinctOnColumnRefs,
137                          List<ColumnRef> qualifyColumnRefs,
138                          List<GroupingElement> groupingElements,
139                          List<ColumnRef> pivotColumnRefs,
140                          boolean distinct,
141                          SetOperator setOperator,
142                          RowLimit rowLimit,
143                          TargetRelation target,
144                          JoinAnalysisFacts joinAnalysisFacts) {
145        this(name, kind, relations, outputColumns, returningColumns, filterColumnRefs,
146                joinColumnRefs, groupByColumnRefs, havingColumnRefs, orderByColumnRefs,
147                distinctOnColumnRefs, qualifyColumnRefs, groupingElements, pivotColumnRefs,
148                distinct, setOperator, rowLimit, target, joinAnalysisFacts, /*sourceSpan=*/ null);
149    }
150
151    /**
152     * Slice 179 (R5) primary constructor — adds the optional block-level
153     * {@link SourceSpan} covering this statement's own source text. Every
154     * prior constructor delegates here with a null span (additive). Set at
155     * the SELECT construction site (where the parse node is in scope) and
156     * read by {@code attachQueryBlockScopes} so each {@link QueryBlockScope}
157     * carries an honest span.
158     */
159    public StatementGraph(String name,
160                          String kind,
161                          List<RelationSource> relations,
162                          List<OutputColumn> outputColumns,
163                          List<OutputColumn> returningColumns,
164                          List<ColumnRef> filterColumnRefs,
165                          List<ColumnRef> joinColumnRefs,
166                          List<ColumnRef> groupByColumnRefs,
167                          List<ColumnRef> havingColumnRefs,
168                          List<ColumnRef> orderByColumnRefs,
169                          List<ColumnRef> distinctOnColumnRefs,
170                          List<ColumnRef> qualifyColumnRefs,
171                          List<GroupingElement> groupingElements,
172                          List<ColumnRef> pivotColumnRefs,
173                          boolean distinct,
174                          SetOperator setOperator,
175                          RowLimit rowLimit,
176                          TargetRelation target,
177                          JoinAnalysisFacts joinAnalysisFacts,
178                          SourceSpan sourceSpan) {
179        if (kind == null || kind.isEmpty()) {
180            throw new IllegalArgumentException("kind must be non-empty");
181        }
182        if (relations == null || outputColumns == null
183                || returningColumns == null
184                || filterColumnRefs == null || joinColumnRefs == null
185                || groupByColumnRefs == null || havingColumnRefs == null
186                || orderByColumnRefs == null || distinctOnColumnRefs == null
187                || qualifyColumnRefs == null || groupingElements == null
188                || pivotColumnRefs == null) {
189            throw new IllegalArgumentException(
190                    "relations/outputColumns/returningColumns/filterColumnRefs/joinColumnRefs/"
191                            + "groupByColumnRefs/havingColumnRefs/orderByColumnRefs/"
192                            + "distinctOnColumnRefs/qualifyColumnRefs/groupingElements/"
193                            + "pivotColumnRefs must not be null");
194        }
195        this.name = (name != null && name.isEmpty()) ? null : name;
196        this.kind = kind;
197        this.relations = Collections.unmodifiableList(new ArrayList<>(relations));
198        this.outputColumns = Collections.unmodifiableList(new ArrayList<>(outputColumns));
199        this.returningColumns = Collections.unmodifiableList(new ArrayList<>(returningColumns));
200        this.filterColumnRefs = Collections.unmodifiableList(new ArrayList<>(filterColumnRefs));
201        this.joinColumnRefs = Collections.unmodifiableList(new ArrayList<>(joinColumnRefs));
202        this.groupByColumnRefs = Collections.unmodifiableList(new ArrayList<>(groupByColumnRefs));
203        this.havingColumnRefs = Collections.unmodifiableList(new ArrayList<>(havingColumnRefs));
204        this.orderByColumnRefs = Collections.unmodifiableList(new ArrayList<>(orderByColumnRefs));
205        this.distinctOnColumnRefs = Collections.unmodifiableList(new ArrayList<>(distinctOnColumnRefs));
206        this.qualifyColumnRefs = Collections.unmodifiableList(new ArrayList<>(qualifyColumnRefs));
207        this.groupingElements = Collections.unmodifiableList(new ArrayList<>(groupingElements));
208        this.pivotColumnRefs = Collections.unmodifiableList(new ArrayList<>(pivotColumnRefs));
209        this.distinct = distinct;
210        this.setOperator = setOperator;
211        this.rowLimit = rowLimit;
212        this.target = target;
213        this.joinAnalysisFacts =
214                joinAnalysisFacts == null ? JoinAnalysisFacts.EMPTY : joinAnalysisFacts;
215        this.sourceSpan = sourceSpan;
216    }
217
218    /**
219     * Return a copy of this statement with its {@link JoinAnalysisFacts}
220     * replaced. Used by the builder (slices 167/168/169/170) to attach
221     * join/predicate/scope facts after the flat graph is built, without
222     * mutating this immutable value. Preserves the block {@link #sourceSpan}.
223     */
224    public StatementGraph withJoinAnalysisFacts(JoinAnalysisFacts facts) {
225        return new StatementGraph(name, kind, relations, outputColumns, returningColumns,
226                filterColumnRefs, joinColumnRefs, groupByColumnRefs, havingColumnRefs,
227                orderByColumnRefs, distinctOnColumnRefs, qualifyColumnRefs, groupingElements,
228                pivotColumnRefs, distinct, setOperator, rowLimit, target, facts, sourceSpan);
229    }
230
231    /**
232     * Return a copy of this statement with its block-level
233     * {@link #sourceSpan} replaced (slice 179, R5). Preserves the
234     * {@link JoinAnalysisFacts}.
235     */
236    public StatementGraph withSourceSpan(SourceSpan newSourceSpan) {
237        return new StatementGraph(name, kind, relations, outputColumns, returningColumns,
238                filterColumnRefs, joinColumnRefs, groupByColumnRefs, havingColumnRefs,
239                orderByColumnRefs, distinctOnColumnRefs, qualifyColumnRefs, groupingElements,
240                pivotColumnRefs, distinct, setOperator, rowLimit, target,
241                joinAnalysisFacts, newSourceSpan);
242    }
243
244    /**
245     * Slice 128 primary constructor preserved — adds the optional
246     * {@code groupingElements} slot, the structured per-top-level-element
247     * view of the {@code GROUP BY} ({@code SIMPLE} / {@code ROLLUP} /
248     * {@code CUBE} / {@code GROUPING SETS}; see {@link GroupingElement}).
249     * Delegates to the slice-129 primary with empty {@code pivotColumnRefs}.
250     */
251    public StatementGraph(String name,
252                          String kind,
253                          List<RelationSource> relations,
254                          List<OutputColumn> outputColumns,
255                          List<OutputColumn> returningColumns,
256                          List<ColumnRef> filterColumnRefs,
257                          List<ColumnRef> joinColumnRefs,
258                          List<ColumnRef> groupByColumnRefs,
259                          List<ColumnRef> havingColumnRefs,
260                          List<ColumnRef> orderByColumnRefs,
261                          List<ColumnRef> distinctOnColumnRefs,
262                          List<ColumnRef> qualifyColumnRefs,
263                          List<GroupingElement> groupingElements,
264                          boolean distinct,
265                          SetOperator setOperator,
266                          RowLimit rowLimit,
267                          TargetRelation target) {
268        this(name, kind, relations, outputColumns, returningColumns,
269                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
270                havingColumnRefs, orderByColumnRefs,
271                distinctOnColumnRefs, qualifyColumnRefs, groupingElements,
272                Collections.<ColumnRef>emptyList(),
273                distinct, setOperator, rowLimit, target);
274    }
275
276    /**
277     * Slice 125 primary constructor preserved — adds the optional
278     * {@code qualifyColumnRefs} slot for the {@code QUALIFY} clause
279     * (Snowflake / BigQuery / Teradata). Delegates to the slice-128
280     * primary constructor with empty {@code groupingElements}.
281     */
282    public StatementGraph(String name,
283                          String kind,
284                          List<RelationSource> relations,
285                          List<OutputColumn> outputColumns,
286                          List<OutputColumn> returningColumns,
287                          List<ColumnRef> filterColumnRefs,
288                          List<ColumnRef> joinColumnRefs,
289                          List<ColumnRef> groupByColumnRefs,
290                          List<ColumnRef> havingColumnRefs,
291                          List<ColumnRef> orderByColumnRefs,
292                          List<ColumnRef> distinctOnColumnRefs,
293                          List<ColumnRef> qualifyColumnRefs,
294                          boolean distinct,
295                          SetOperator setOperator,
296                          RowLimit rowLimit,
297                          TargetRelation target) {
298        this(name, kind, relations, outputColumns, returningColumns,
299                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
300                havingColumnRefs, orderByColumnRefs,
301                distinctOnColumnRefs, qualifyColumnRefs,
302                Collections.<GroupingElement>emptyList(),
303                distinct, setOperator, rowLimit, target);
304    }
305
306    /**
307     * Slice 85 constructor preserved so production code that predates
308     * slice 125 keeps compiling unchanged. Delegates to the slice-125
309     * primary constructor with empty {@code qualifyColumnRefs}. QUALIFY
310     * is a SELECT-only clause, so DML / predicate-body / set-op-outer
311     * call sites correctly default to empty.
312     */
313    public StatementGraph(String name,
314                          String kind,
315                          List<RelationSource> relations,
316                          List<OutputColumn> outputColumns,
317                          List<OutputColumn> returningColumns,
318                          List<ColumnRef> filterColumnRefs,
319                          List<ColumnRef> joinColumnRefs,
320                          List<ColumnRef> groupByColumnRefs,
321                          List<ColumnRef> havingColumnRefs,
322                          List<ColumnRef> orderByColumnRefs,
323                          List<ColumnRef> distinctOnColumnRefs,
324                          boolean distinct,
325                          SetOperator setOperator,
326                          RowLimit rowLimit,
327                          TargetRelation target) {
328        this(name, kind, relations, outputColumns, returningColumns,
329                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
330                havingColumnRefs, orderByColumnRefs,
331                distinctOnColumnRefs,
332                Collections.<ColumnRef>emptyList(),
333                distinct, setOperator, rowLimit, target);
334    }
335
336    /**
337     * Slice 78 constructor preserved so production code that predates
338     * slice 85 keeps compiling unchanged. Delegates to the slice-85
339     * constructor with empty {@code returningColumns}.
340     */
341    public StatementGraph(String name,
342                          String kind,
343                          List<RelationSource> relations,
344                          List<OutputColumn> outputColumns,
345                          List<ColumnRef> filterColumnRefs,
346                          List<ColumnRef> joinColumnRefs,
347                          List<ColumnRef> groupByColumnRefs,
348                          List<ColumnRef> havingColumnRefs,
349                          List<ColumnRef> orderByColumnRefs,
350                          List<ColumnRef> distinctOnColumnRefs,
351                          boolean distinct,
352                          SetOperator setOperator,
353                          RowLimit rowLimit,
354                          TargetRelation target) {
355        this(name, kind, relations, outputColumns,
356                Collections.<OutputColumn>emptyList(),
357                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
358                havingColumnRefs, orderByColumnRefs,
359                distinctOnColumnRefs,
360                distinct, setOperator, rowLimit, target);
361    }
362
363    /**
364     * Slice 73 constructor preserved so SELECT-kind production code that
365     * predates slice 78 keeps compiling unchanged. Delegates to the
366     * slice-78 constructor with {@code target=null}.
367     */
368    public StatementGraph(String name,
369                          String kind,
370                          List<RelationSource> relations,
371                          List<OutputColumn> outputColumns,
372                          List<ColumnRef> filterColumnRefs,
373                          List<ColumnRef> joinColumnRefs,
374                          List<ColumnRef> groupByColumnRefs,
375                          List<ColumnRef> havingColumnRefs,
376                          List<ColumnRef> orderByColumnRefs,
377                          List<ColumnRef> distinctOnColumnRefs,
378                          boolean distinct,
379                          SetOperator setOperator,
380                          RowLimit rowLimit) {
381        this(name, kind, relations, outputColumns,
382                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
383                havingColumnRefs, orderByColumnRefs,
384                distinctOnColumnRefs,
385                distinct, setOperator, rowLimit, /*target=*/ null);
386    }
387
388    /**
389     * Slice 125 SELECT constructor — the slice-73 SELECT shape plus the
390     * {@code qualifyColumnRefs} slot. Used by the shared SELECT builder
391     * ({@code SemanticIRBuilder.buildSelectStatementImpl}) so a SELECT
392     * carrying a QUALIFY clause can pass its resolved filter refs.
393     * Delegates to the slice-125 primary constructor with empty
394     * {@code returningColumns} and {@code target=null}. The parameter
395     * list differs from the slice-78 14-arg constructor by type
396     * (a {@code List} {@code qualifyColumnRefs} at position 11 vs a
397     * {@code boolean distinct} there), so overload resolution is
398     * unambiguous.
399     */
400    public StatementGraph(String name,
401                          String kind,
402                          List<RelationSource> relations,
403                          List<OutputColumn> outputColumns,
404                          List<ColumnRef> filterColumnRefs,
405                          List<ColumnRef> joinColumnRefs,
406                          List<ColumnRef> groupByColumnRefs,
407                          List<ColumnRef> havingColumnRefs,
408                          List<ColumnRef> orderByColumnRefs,
409                          List<ColumnRef> distinctOnColumnRefs,
410                          List<ColumnRef> qualifyColumnRefs,
411                          boolean distinct,
412                          SetOperator setOperator,
413                          RowLimit rowLimit) {
414        this(name, kind, relations, outputColumns,
415                Collections.<OutputColumn>emptyList(),
416                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
417                havingColumnRefs, orderByColumnRefs,
418                distinctOnColumnRefs,
419                qualifyColumnRefs,
420                distinct, setOperator, rowLimit, /*target=*/ null);
421    }
422
423    /**
424     * Slice 128 SELECT constructor — the slice-125 SELECT shape plus the
425     * {@code groupingElements} slot, the structured GROUP BY view. Used by
426     * the shared SELECT builder ({@code SemanticIRBuilder}) so a SELECT
427     * with a GROUP BY can pass its structured grouping elements. Delegates
428     * to the slice-128 primary with empty {@code returningColumns} and
429     * {@code target=null}. The parameter list differs from the slice-125
430     * 14-arg SELECT constructor by the extra {@code List groupingElements}
431     * arg, so overload resolution is unambiguous.
432     */
433    public StatementGraph(String name,
434                          String kind,
435                          List<RelationSource> relations,
436                          List<OutputColumn> outputColumns,
437                          List<ColumnRef> filterColumnRefs,
438                          List<ColumnRef> joinColumnRefs,
439                          List<ColumnRef> groupByColumnRefs,
440                          List<ColumnRef> havingColumnRefs,
441                          List<ColumnRef> orderByColumnRefs,
442                          List<ColumnRef> distinctOnColumnRefs,
443                          List<ColumnRef> qualifyColumnRefs,
444                          List<GroupingElement> groupingElements,
445                          boolean distinct,
446                          SetOperator setOperator,
447                          RowLimit rowLimit) {
448        this(name, kind, relations, outputColumns,
449                Collections.<OutputColumn>emptyList(),
450                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
451                havingColumnRefs, orderByColumnRefs,
452                distinctOnColumnRefs,
453                qualifyColumnRefs,
454                groupingElements,
455                distinct, setOperator, rowLimit, /*target=*/ null);
456    }
457
458    /**
459     * Slice 129 PIVOT SELECT constructor — a {@code SELECT} whose FROM
460     * source is a {@code PIVOT}. Carries {@code relations} (the underlying
461     * pivot source), {@code outputColumns} (one per projected column), and
462     * {@code pivotColumnRefs} (the consumed FOR + aggregation-arg columns).
463     * Every other slot defaults empty / null: a slice-129 PIVOT skeleton has
464     * no filter / join / group-by / having / order-by / distinct-on /
465     * qualify / grouping / returning refs, is not {@code DISTINCT}, is not a
466     * set-op, has no row-limit, and writes no target. The five-arg shape is
467     * unambiguous against every other constructor.
468     */
469    public StatementGraph(String name,
470                          String kind,
471                          List<RelationSource> relations,
472                          List<OutputColumn> outputColumns,
473                          List<ColumnRef> pivotColumnRefs) {
474        this(name, kind, relations, outputColumns,
475                Collections.<OutputColumn>emptyList(),
476                Collections.<ColumnRef>emptyList(),
477                Collections.<ColumnRef>emptyList(),
478                Collections.<ColumnRef>emptyList(),
479                Collections.<ColumnRef>emptyList(),
480                Collections.<ColumnRef>emptyList(),
481                Collections.<ColumnRef>emptyList(),
482                Collections.<ColumnRef>emptyList(),
483                Collections.<GroupingElement>emptyList(),
484                pivotColumnRefs,
485                /*distinct=*/ false, /*setOperator=*/ null,
486                /*rowLimit=*/ null, /*target=*/ null);
487    }
488
489    /**
490     * Slice 156 PIVOT SELECT constructor — the slice-129 PIVOT shape plus a
491     * {@code filterColumnRefs} slot for an admitted passthrough-only
492     * {@code WHERE} clause over a PIVOT (every WHERE ref resolves to a provable
493     * passthrough source column). All other clause slots default empty / null,
494     * exactly as the five-arg PIVOT constructor — so an admitted PIVOT without a
495     * WHERE (empty {@code filterColumnRefs}) is byte-identical to the five-arg
496     * form. The six-arg shape (two {@code List<ColumnRef>} trailing args) is
497     * unambiguous against every other constructor by arity.
498     */
499    public StatementGraph(String name,
500                          String kind,
501                          List<RelationSource> relations,
502                          List<OutputColumn> outputColumns,
503                          List<ColumnRef> filterColumnRefs,
504                          List<ColumnRef> pivotColumnRefs) {
505        this(name, kind, relations, outputColumns,
506                Collections.<OutputColumn>emptyList(),
507                filterColumnRefs,
508                Collections.<ColumnRef>emptyList(),
509                Collections.<ColumnRef>emptyList(),
510                Collections.<ColumnRef>emptyList(),
511                Collections.<ColumnRef>emptyList(),
512                Collections.<ColumnRef>emptyList(),
513                Collections.<ColumnRef>emptyList(),
514                Collections.<GroupingElement>emptyList(),
515                pivotColumnRefs,
516                /*distinct=*/ false, /*setOperator=*/ null,
517                /*rowLimit=*/ null, /*target=*/ null);
518    }
519
520    /**
521     * Slice 158 PIVOT SELECT constructor — the slice-156 PIVOT shape plus a
522     * {@code groupByColumnRefs} slot for an admitted passthrough-only
523     * {@code GROUP BY} clause over a PIVOT/UNPIVOT (every GROUP BY ref resolves
524     * to a provable passthrough source column). All other clause slots default
525     * empty / null, exactly as the six-arg PIVOT constructor — so an admitted
526     * PIVOT without a GROUP BY (empty {@code groupByColumnRefs}) is byte-identical
527     * to the six-arg form. The seven-arg shape (three {@code List<ColumnRef>}
528     * trailing args) is unambiguous against every other constructor by arity.
529     */
530    public StatementGraph(String name,
531                          String kind,
532                          List<RelationSource> relations,
533                          List<OutputColumn> outputColumns,
534                          List<ColumnRef> filterColumnRefs,
535                          List<ColumnRef> groupByColumnRefs,
536                          List<ColumnRef> pivotColumnRefs) {
537        this(name, kind, relations, outputColumns,
538                Collections.<OutputColumn>emptyList(),
539                filterColumnRefs,
540                Collections.<ColumnRef>emptyList(),
541                groupByColumnRefs,
542                Collections.<ColumnRef>emptyList(),
543                Collections.<ColumnRef>emptyList(),
544                Collections.<ColumnRef>emptyList(),
545                Collections.<ColumnRef>emptyList(),
546                Collections.<GroupingElement>emptyList(),
547                pivotColumnRefs,
548                /*distinct=*/ false, /*setOperator=*/ null,
549                /*rowLimit=*/ null, /*target=*/ null);
550    }
551
552    /**
553     * Slice 159 PIVOT SELECT constructor — the slice-158 PIVOT shape plus a
554     * {@code qualifyColumnRefs} slot for an admitted passthrough-only
555     * {@code QUALIFY} clause over a PIVOT/UNPIVOT (every QUALIFY ref resolves to a
556     * provable passthrough source column). All other clause slots default empty /
557     * null, exactly as the seven-arg PIVOT constructor — so an admitted PIVOT
558     * without a QUALIFY (empty {@code qualifyColumnRefs}) is byte-identical to the
559     * seven-arg form. The eight-arg shape (four {@code List<ColumnRef>} trailing
560     * args) is unambiguous against every other constructor by arity.
561     */
562    public StatementGraph(String name,
563                          String kind,
564                          List<RelationSource> relations,
565                          List<OutputColumn> outputColumns,
566                          List<ColumnRef> filterColumnRefs,
567                          List<ColumnRef> groupByColumnRefs,
568                          List<ColumnRef> qualifyColumnRefs,
569                          List<ColumnRef> pivotColumnRefs) {
570        this(name, kind, relations, outputColumns,
571                Collections.<OutputColumn>emptyList(),
572                filterColumnRefs,
573                Collections.<ColumnRef>emptyList(),
574                groupByColumnRefs,
575                Collections.<ColumnRef>emptyList(),
576                Collections.<ColumnRef>emptyList(),
577                Collections.<ColumnRef>emptyList(),
578                qualifyColumnRefs,
579                Collections.<GroupingElement>emptyList(),
580                pivotColumnRefs,
581                /*distinct=*/ false, /*setOperator=*/ null,
582                /*rowLimit=*/ null, /*target=*/ null);
583    }
584
585    /**
586     * Slice 160 PIVOT SELECT constructor — the slice-159 PIVOT shape plus a
587     * {@code havingColumnRefs} slot for an admitted passthrough-only
588     * {@code HAVING} clause over a PIVOT/UNPIVOT (every HAVING ref resolves to a
589     * provable passthrough source column). All other clause slots default empty /
590     * null, exactly as the eight-arg PIVOT constructor — so an admitted PIVOT
591     * without a HAVING (empty {@code havingColumnRefs}) is byte-identical to the
592     * eight-arg form. The nine-arg shape (five {@code List<ColumnRef>} trailing
593     * args) is unambiguous against every other constructor by arity. The trailing
594     * args follow the natural SQL clause order: filter (WHERE), groupBy, having,
595     * qualify, then the consumed pivot refs.
596     */
597    public StatementGraph(String name,
598                          String kind,
599                          List<RelationSource> relations,
600                          List<OutputColumn> outputColumns,
601                          List<ColumnRef> filterColumnRefs,
602                          List<ColumnRef> groupByColumnRefs,
603                          List<ColumnRef> havingColumnRefs,
604                          List<ColumnRef> qualifyColumnRefs,
605                          List<ColumnRef> pivotColumnRefs) {
606        this(name, kind, relations, outputColumns,
607                Collections.<OutputColumn>emptyList(),
608                filterColumnRefs,
609                Collections.<ColumnRef>emptyList(),
610                groupByColumnRefs,
611                havingColumnRefs,
612                Collections.<ColumnRef>emptyList(),
613                Collections.<ColumnRef>emptyList(),
614                qualifyColumnRefs,
615                Collections.<GroupingElement>emptyList(),
616                pivotColumnRefs,
617                /*distinct=*/ false, /*setOperator=*/ null,
618                /*rowLimit=*/ null, /*target=*/ null);
619    }
620
621    /**
622     * Slice 161 PIVOT SELECT constructor — the slice-160 PIVOT shape plus an
623     * {@code orderByColumnRefs} slot for a passthrough-only {@code ORDER BY}
624     * clause over a PIVOT/UNPIVOT (every sort key resolves to a provable
625     * passthrough source column). UNLIKE the WHERE/GROUP BY/HAVING/QUALIFY
626     * slots, an ORDER BY over a pivot was already admitted (slice 142,
627     * lineage-neutral), so this slot only refines the lineage and is empty when
628     * no passthrough sort key can be proven. All other clause slots default
629     * empty / null, exactly as the nine-arg PIVOT constructor — so an admitted
630     * PIVOT without a passthrough ORDER BY (empty {@code orderByColumnRefs}) is
631     * byte-identical to the nine-arg form. The ten-arg shape (six
632     * {@code List<ColumnRef>} trailing args) is unambiguous against every other
633     * constructor by arity. The trailing args follow the natural SQL clause
634     * order: filter (WHERE), groupBy, having, qualify, orderBy, then the
635     * consumed pivot refs.
636     */
637    public StatementGraph(String name,
638                          String kind,
639                          List<RelationSource> relations,
640                          List<OutputColumn> outputColumns,
641                          List<ColumnRef> filterColumnRefs,
642                          List<ColumnRef> groupByColumnRefs,
643                          List<ColumnRef> havingColumnRefs,
644                          List<ColumnRef> qualifyColumnRefs,
645                          List<ColumnRef> orderByColumnRefs,
646                          List<ColumnRef> pivotColumnRefs) {
647        this(name, kind, relations, outputColumns,
648                Collections.<OutputColumn>emptyList(),
649                filterColumnRefs,
650                Collections.<ColumnRef>emptyList(),
651                groupByColumnRefs,
652                havingColumnRefs,
653                orderByColumnRefs,
654                Collections.<ColumnRef>emptyList(),
655                qualifyColumnRefs,
656                Collections.<GroupingElement>emptyList(),
657                pivotColumnRefs,
658                /*distinct=*/ false, /*setOperator=*/ null,
659                /*rowLimit=*/ null, /*target=*/ null);
660    }
661
662    /**
663     * Pre-slice-73 constructor preserved so hand-built test fixtures
664     * (e.g. {@code SemanticIRProjectorBodyIndexesTest}) continue to
665     * compile without touching every call site. Delegates to the
666     * slice-73 constructor with an empty {@code distinctOnColumnRefs}
667     * list. New production code should call the slice-78 primary
668     * constructor directly.
669     */
670    public StatementGraph(String name,
671                          String kind,
672                          List<RelationSource> relations,
673                          List<OutputColumn> outputColumns,
674                          List<ColumnRef> filterColumnRefs,
675                          List<ColumnRef> joinColumnRefs,
676                          List<ColumnRef> groupByColumnRefs,
677                          List<ColumnRef> havingColumnRefs,
678                          List<ColumnRef> orderByColumnRefs,
679                          boolean distinct,
680                          SetOperator setOperator,
681                          RowLimit rowLimit) {
682        this(name, kind, relations, outputColumns,
683                filterColumnRefs, joinColumnRefs, groupByColumnRefs,
684                havingColumnRefs, orderByColumnRefs,
685                Collections.<ColumnRef>emptyList(),
686                distinct, setOperator, rowLimit);
687    }
688
689    /** Nullable: name for a CTE body or FROM-subquery alias, else null. */
690    public String getName() {
691        return name;
692    }
693
694    public String getKind() {
695        return kind;
696    }
697
698    public List<RelationSource> getRelations() {
699        return relations;
700    }
701
702    public List<OutputColumn> getOutputColumns() {
703        return outputColumns;
704    }
705
706    /**
707     * Slice 85 — RETURNING / OUTPUT projection columns for INSERT / UPDATE /
708     * DELETE statements. Empty list on every SELECT-kind statement (CTE
709     * body / FROM-subquery / scalar / set-op branch / outer), on every
710     * DML statement that did not supply a RETURNING (PG / Oracle) or
711     * OUTPUT (SQL Server) clause, and on CTAS / CREATE VIEW statements.
712     *
713     * <p>For PG / Oracle RETURNING, each entry's
714     * {@link OutputColumn#getName()} is the explicit alias when present,
715     * else the verbatim bare column spelling.
716     * {@link OutputColumn#getSources()} lists the underlying column refs;
717     * the {@code relationAlias} resolves through the same provider used
718     * for SET RHS / WHERE / JOIN ON, so a joined-UPDATE with
719     * {@code RETURNING t.a, s.x} produces refs against both target and
720     * FROM-side relations.
721     *
722     * <p>For SQL Server OUTPUT pseudo-table refs (INSERTED.col,
723     * DELETED.col), the {@code relationAlias} is preserved as the
724     * uppercase pseudo-table name ({@code "INSERTED"} or
725     * {@code "DELETED"}) so consumers can distinguish post-write from
726     * pre-write row state. Lineage edges still flow to
727     * {@link LineageRef#tableColumn(String, String)} pointing at the
728     * physical target table column — both INSERTED and DELETED ultimately
729     * reference the same physical column; only the temporal phase differs.
730     */
731    public List<OutputColumn> getReturningColumns() {
732        return returningColumns;
733    }
734
735    public List<ColumnRef> getFilterColumnRefs() {
736        return filterColumnRefs;
737    }
738
739    public List<ColumnRef> getJoinColumnRefs() {
740        return joinColumnRefs;
741    }
742
743    public List<ColumnRef> getGroupByColumnRefs() {
744        return groupByColumnRefs;
745    }
746
747    /**
748     * Column references that appear in the {@code HAVING} clause's
749     * predicate. The list is per-statement and per-clause: a HAVING
750     * predicate that names {@code d.id} contributes one entry; a HAVING
751     * predicate inside an aggregate ({@code HAVING SUM(salary) > 1000})
752     * contributes the underlying column ({@code salary}) — the same
753     * convention used for projection-side aggregate arguments
754     * (slice 6 OutputColumn.sources).
755     *
756     * <p>Subqueries in HAVING (scalar, EXISTS, IN-SELECT, ANY/ALL/SOME)
757     * and window functions in HAVING are rejected by the builder rather
758     * than silently captured, because the visitor would descend into
759     * inner scopes and leak refs (mirrors the slice-9 ORDER BY guards).
760     *
761     * <p>HAVING is row-influence semantically (it filters out groups),
762     * but it deliberately does <i>not</i> contribute to the canonical
763     * lineage model (slice 7 / {@code CanonicalLineageEdge}). The
764     * canonical model is a parity contract between IR and dlineage, and
765     * dlineage exposes no per-clause HAVING field — it folds HAVING refs
766     * into aggregate-function fdr/fdd edges. Including HAVING-derived
767     * canonical edges only on the IR side would manufacture
768     * divergence-by-design. The {@code havingColumnRefs} field remains
769     * useful for downstream consumers (SQL Guard, lineage explainers)
770     * that don't depend on the dlineage parity contract.
771     */
772    public List<ColumnRef> getHavingColumnRefs() {
773        return havingColumnRefs;
774    }
775
776    /**
777     * Column references that appear in the {@code ORDER BY} clause's sort
778     * keys. Only physical column references are recorded — ordinal
779     * ({@code ORDER BY 1}) and projection-alias ({@code ORDER BY x})
780     * forms are rejected by the builder, not silently emitted as
781     * {@code []}. Sort direction ({@code ASC}/{@code DESC}) and null
782     * placement ({@code NULLS FIRST}/{@code NULLS LAST}) are presentation
783     * metadata and are not modelled.
784     *
785     * <p>The flag is per-statement: in
786     * {@code WITH x AS (... ORDER BY id) SELECT id FROM x} the inner
787     * statement's {@code orderByColumnRefs} contains {@code id} while the
788     * outer's is empty.
789     */
790    public List<ColumnRef> getOrderByColumnRefs() {
791        return orderByColumnRefs;
792    }
793
794    /**
795     * Whether the statement applies row-deduplication. True for
796     * {@code SELECT DISTINCT}, Oracle's deprecated synonym
797     * {@code SELECT UNIQUE}, AND PostgreSQL / Greenplum
798     * {@code SELECT DISTINCT ON (cols)}; false for {@code SELECT},
799     * {@code SELECT ALL}, and the absence of any row-filter clause.
800     * The flag is per-statement, never per-output.
801     *
802     * <p>For {@code DISTINCT ON (cols)} the partition keys live on
803     * {@link #getDistinctOnColumnRefs()}; the boolean here pins the
804     * semantic invariant that the statement deduplicates rows
805     * regardless of which key shape is used.
806     */
807    public boolean isDistinct() {
808        return distinct;
809    }
810
811    /**
812     * Column references in the {@code DISTINCT ON (cols)} partition list
813     * (PostgreSQL / Greenplum). Empty for plain {@code SELECT DISTINCT},
814     * {@code SELECT UNIQUE}, {@code SELECT ALL}, and the absence of any
815     * row-filter clause.
816     *
817     * <p>Invariant: {@code !distinctOnColumnRefs.isEmpty()} implies
818     * {@link #isDistinct()} == {@code true}. The reverse does not hold
819     * (plain {@code DISTINCT} also returns {@code true}).
820     *
821     * <p>The list collects physical column refs the same way
822     * {@code groupByColumnRefs} does: column refs inside compound
823     * expressions ({@code a + b}, {@code CASE WHEN ...}) and aggregate
824     * arguments ({@code COUNT(x)}) are descended into; subqueries and
825     * window functions in {@code DISTINCT ON} are rejected by the
826     * builder so they cannot leak inner-scope refs.
827     *
828     * <p>Oracle, MySQL, Redshift and other non-PG vendors silently
829     * accept {@code DISTINCT ON (...)} as plain {@code DISTINCT} —
830     * their parser drops the ON expression list, so this slot stays
831     * empty for those vendors regardless of the surface SQL.
832     */
833    public List<ColumnRef> getDistinctOnColumnRefs() {
834        return distinctOnColumnRefs;
835    }
836
837    /**
838     * Column references that appear in the {@code QUALIFY} clause's
839     * predicate (Snowflake / BigQuery / Teradata). QUALIFY filters rows on
840     * window-function results; it is row-influence in the same family as
841     * {@code WHERE} and {@code HAVING}.
842     *
843     * <p>Two surface forms reduce to the SAME set of influencing base
844     * columns:
845     * <ul>
846     *   <li>Inline window form
847     *       ({@code QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) = 1})
848     *       — the window's PARTITION BY / ORDER BY / argument refs are
849     *       collected directly ({@code a}, {@code b}).</li>
850     *   <li>Projection-alias form ({@code QUALIFY rn = 1} where
851     *       {@code rn} aliases a window projection) — the alias resolves to
852     *       the matching {@link OutputColumn}; its influencing columns are
853     *       {@code getSources()} unioned with the
854     *       {@link WindowSpec#getPartitionRefs()} and
855     *       {@link WindowSpec#getOrderRefs()} of its window spec (a window
856     *       {@link OutputColumn#getSources()} is empty on its own).</li>
857     * </ul>
858     *
859     * <p>Subqueries in QUALIFY are rejected by the builder (they would
860     * leak inner-scope refs); window functions are admitted (the whole
861     * point of QUALIFY).
862     *
863     * <p>Like {@code havingColumnRefs}, this slot is row-influence
864     * semantically but deliberately does <i>not</i> contribute to the
865     * canonical lineage model (slice 7 / {@code CanonicalLineageEdge}).
866     * The canonical model is a parity contract with dlineage, which
867     * exposes no per-clause QUALIFY field; emitting QUALIFY-derived
868     * canonical edges only on the IR side would manufacture
869     * divergence-by-design.
870     */
871    public List<ColumnRef> getQualifyColumnRefs() {
872        return qualifyColumnRefs;
873    }
874
875    /**
876     * Structured per-top-level-element view of the {@code GROUP BY} (slice
877     * 128): one {@link GroupingElement} per top-level grouping item in
878     * document order, each tagged {@code SIMPLE} / {@code ROLLUP} /
879     * {@code CUBE} / {@code GROUPING_SETS} with its flattened member
880     * columns. Empty iff the statement has no {@code GROUP BY} items; a
881     * plain {@code GROUP BY a, b} yields {@code [SIMPLE: a, SIMPLE: b]}.
882     *
883     * <p>This is additive to {@link #getGroupByColumnRefs()}, which still
884     * returns the flat, deduplicated, document-order union of every
885     * grouping column (slice 127) regardless of grouping structure.
886     * {@code groupingElements} preserves the structure the flat list
887     * discards — relevant for governance (a {@code ROLLUP}/{@code CUBE}
888     * changes output cardinality and produces super-aggregate rows) and
889     * for downstream OpenLineage / DataHub consumers.
890     */
891    public List<GroupingElement> getGroupingElements() {
892        return groupingElements;
893    }
894
895    /**
896     * Slice 129 — columns CONSUMED by a {@code PIVOT} operator, in document
897     * order: the {@code FOR} / pivot column(s) first, then the
898     * aggregation-function argument column(s). All resolve to the underlying
899     * pivot source relation ({@link #getRelations()}). Empty on every
900     * non-PIVOT statement.
901     *
902     * <p>These are the input columns a PIVOT reads; the IN-list values
903     * become the new output column names (synthesised in a later sub-slice).
904     * Function names, literals, and column-alias nodes (e.g. the Oracle
905     * {@code SUM(quantity) AS q}) are not columns and do not appear here.
906     *
907     * <p>Slice 129 (sub-slice a) admits only a PIVOT over a base-table
908     * source with an explicit (non-{@code *}) projection and no other query
909     * clauses; broader shapes (UNPIVOT, subquery source, {@code SELECT *}
910     * expansion, output-column lineage) are deferred to later sub-slices and
911     * rejected with structured {@code PIVOT_*} diagnostics until then.
912     */
913    public List<ColumnRef> getPivotColumnRefs() {
914        return pivotColumnRefs;
915    }
916
917    /**
918     * Set-operation kind for the outer statement of a set-op program
919     * (slice 12). Returns null on every regular SELECT statement and on
920     * every CTE / FROM-subquery / scalar / set-op-branch body. The
921     * {@code _ALL} variants encode {@code TSelectSqlStatement#isAll()};
922     * {@code MINUS} (Oracle / Spark / Hive) and {@code EXCEPT}
923     * (PostgreSQL / SQL Server / standard) are kept distinct because the
924     * parser exposes them as separate
925     * {@link gudusoft.gsqlparser.ESetOperatorType} values, even though
926     * they are semantically equivalent.
927     */
928    public SetOperator getSetOperator() {
929        return setOperator;
930    }
931
932    /**
933     * Per-statement row-limit metadata (slice 70). Returns null when no
934     * row-limit clause was present, or when the row-limit clause is in
935     * slice-71 / 72 territory (TOP, standalone OFFSET, PG inline
936     * {@code LIMIT N OFFSET M}, MySQL inline {@code LIMIT M, N},
937     * set-op outer row-limit) — those surfaces continue to be rejected
938     * by the builder with their existing diagnostic codes.
939     *
940     * <p>When non-null, the {@link RowLimit#getKind()} captures which
941     * surface SQL form was used ({@code LIMIT} vs {@code FETCH FIRST})
942     * and {@link RowLimit#getCount()} captures the verbatim count text.
943     *
944     * <p>Row-limit metadata does <i>not</i> change column lineage. The
945     * canonical lineage model (slice 7 / {@code CanonicalLineageEdge})
946     * deliberately ignores it: row-limit is presentation-time pruning,
947     * not a column-flow influence. ORDER BY refs, output sources,
948     * filter / join / group-by / having refs are all unaffected.
949     */
950    public RowLimit getRowLimit() {
951        return rowLimit;
952    }
953
954    /**
955     * Slice 78 — write-side target for INSERT statements. Non-null only on
956     * {@code "INSERT"}-kind statements; null on every {@code "SELECT"}-kind
957     * statement (whether the SELECT is an outer, CTE body, FROM-subquery
958     * body, scalar-subquery body, or set-op branch).
959     *
960     * <p>When non-null, {@link TargetRelation#getBinding()} is the target
961     * table (kind = {@link RelationKind#TABLE}) and
962     * {@link TargetRelation#getColumns()} holds the verbatim SQL column-list
963     * spellings (empty list when the SQL author omitted the column list).
964     *
965     * <p>Cross-statement {@link LineageEdge}s for INSERT use
966     * {@link LineageRef#tableColumn(String, String)} as the {@code from}
967     * endpoint (target_table, target_col) and
968     * {@link LineageRef#statementOutput(int, String)} as the {@code to}
969     * endpoint (source SELECT body statement index + output name).
970     */
971    public TargetRelation getTarget() {
972        return target;
973    }
974
975    /**
976     * Join-analysis facts for this query block (slice 167, GAP 1/2/4):
977     * the structured {@link JoinGraph}, WHERE filter predicates, and
978     * query-block scope. Never null ({@link JoinAnalysisFacts#EMPTY} when
979     * not populated).
980     */
981    public JoinAnalysisFacts getJoinAnalysisFacts() {
982        return joinAnalysisFacts;
983    }
984
985    /**
986     * Convenience accessor for the structured {@link JoinGraph} (slice
987     * 167). Never null ({@link JoinGraph#EMPTY} when this block has no
988     * modelled joins).
989     */
990    public JoinGraph getJoinGraph() {
991        return joinAnalysisFacts.getJoinGraph();
992    }
993
994    /**
995     * Optional block-level source span (slice 179, R5) covering this
996     * statement's own text, or {@code null} when not set. The same span is
997     * surfaced on {@link QueryBlockScope#getSourceSpan()}.
998     */
999    public SourceSpan getSourceSpan() {
1000        return sourceSpan;
1001    }
1002}