001package gudusoft.gsqlparser.ir.semantic.builder;
002
003import gudusoft.gsqlparser.EBoundaryType;
004import gudusoft.gsqlparser.EDbObjectType;
005import gudusoft.gsqlparser.EDbVendor;
006import gudusoft.gsqlparser.EExpressionType;
007import gudusoft.gsqlparser.EJoinType;
008import gudusoft.gsqlparser.ELimitRowType;
009import gudusoft.gsqlparser.EPseudoTableType;
010import gudusoft.gsqlparser.ESetOperatorType;
011import gudusoft.gsqlparser.EUniqueRowFilterType;
012import gudusoft.gsqlparser.ir.semantic.ColumnRef;
013import gudusoft.gsqlparser.ir.semantic.Diagnostic;
014import gudusoft.gsqlparser.ir.semantic.DiagnosticCode;
015import gudusoft.gsqlparser.ir.semantic.FrameBound;
016import gudusoft.gsqlparser.nodes.TParseTreeNode;
017import gudusoft.gsqlparser.ir.semantic.LineageEdge;
018import gudusoft.gsqlparser.ir.semantic.LineageRef;
019import gudusoft.gsqlparser.ir.semantic.OutputColumn;
020import gudusoft.gsqlparser.ir.semantic.RelationKind;
021import gudusoft.gsqlparser.ir.semantic.RelationSource;
022import gudusoft.gsqlparser.ir.semantic.RowLimit;
023import gudusoft.gsqlparser.ir.semantic.RowLimitKind;
024import gudusoft.gsqlparser.ir.semantic.SemanticProgram;
025import gudusoft.gsqlparser.ir.semantic.SetOperator;
026import gudusoft.gsqlparser.ir.semantic.StatementGraph;
027import gudusoft.gsqlparser.ir.semantic.TargetRelation;
028import gudusoft.gsqlparser.ir.semantic.WindowFrame;
029import gudusoft.gsqlparser.ir.semantic.WindowSpec;
030import gudusoft.gsqlparser.ir.semantic.binding.ColumnBinding;
031import gudusoft.gsqlparser.ir.semantic.binding.FromSubqueryNaming;
032import gudusoft.gsqlparser.ir.semantic.binding.NameBindingProvider;
033import gudusoft.gsqlparser.ir.semantic.binding.RelationBinding;
034import gudusoft.gsqlparser.ir.semantic.binding.UsingScope;
035import gudusoft.gsqlparser.nodes.TCTE;
036import gudusoft.gsqlparser.nodes.TCTEList;
037import gudusoft.gsqlparser.nodes.TExpression;
038import gudusoft.gsqlparser.nodes.TExpressionList;
039import gudusoft.gsqlparser.nodes.TFetchFirstClause;
040import gudusoft.gsqlparser.nodes.TFunctionCall;
041import gudusoft.gsqlparser.nodes.TLimitClause;
042import gudusoft.gsqlparser.nodes.TOffsetClause;
043import gudusoft.gsqlparser.nodes.TTopClause;
044import gudusoft.gsqlparser.nodes.TGroupBy;
045import gudusoft.gsqlparser.nodes.TGroupByItem;
046import gudusoft.gsqlparser.nodes.TGroupByItemList;
047import gudusoft.gsqlparser.nodes.TJoin;
048import gudusoft.gsqlparser.nodes.TJoinItem;
049import gudusoft.gsqlparser.nodes.TJoinItemList;
050import gudusoft.gsqlparser.nodes.TJoinList;
051import gudusoft.gsqlparser.nodes.TObjectName;
052import gudusoft.gsqlparser.nodes.TObjectNameList;
053import gudusoft.gsqlparser.nodes.TOrderBy;
054import gudusoft.gsqlparser.nodes.TOutputClause;
055import gudusoft.gsqlparser.nodes.TReturningClause;
056import gudusoft.gsqlparser.nodes.TOrderByItem;
057import gudusoft.gsqlparser.nodes.TOrderByItemList;
058import gudusoft.gsqlparser.nodes.TParseTreeVisitor;
059import gudusoft.gsqlparser.nodes.TPartitionClause;
060import gudusoft.gsqlparser.nodes.TResultColumn;
061import gudusoft.gsqlparser.nodes.TResultColumnList;
062import gudusoft.gsqlparser.nodes.TSelectDistinct;
063import gudusoft.gsqlparser.nodes.TTable;
064import gudusoft.gsqlparser.nodes.TWhereClause;
065import gudusoft.gsqlparser.nodes.TWindowDef;
066import gudusoft.gsqlparser.nodes.TWithinGroup;
067import gudusoft.gsqlparser.nodes.TWindowFrame;
068import gudusoft.gsqlparser.nodes.TWindowFrameBoundary;
069import gudusoft.gsqlparser.resolver2.ResolutionStatus;
070import gudusoft.gsqlparser.EInsertSource;
071import gudusoft.gsqlparser.nodes.TColumnDefinition;
072import gudusoft.gsqlparser.nodes.TColumnDefinitionList;
073import gudusoft.gsqlparser.nodes.TViewAliasClause;
074import gudusoft.gsqlparser.nodes.TViewAliasItem;
075import gudusoft.gsqlparser.nodes.TViewAliasItemList;
076import gudusoft.gsqlparser.stmt.TCreateTableSqlStatement;
077import gudusoft.gsqlparser.stmt.TCreateViewSqlStatement;
078import gudusoft.gsqlparser.stmt.TDeleteSqlStatement;
079import gudusoft.gsqlparser.stmt.TInsertSqlStatement;
080import gudusoft.gsqlparser.stmt.TMergeSqlStatement;
081import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
082import gudusoft.gsqlparser.stmt.TUpdateSqlStatement;
083import gudusoft.gsqlparser.nodes.TMergeWhenClause;
084import gudusoft.gsqlparser.nodes.TMergeUpdateClause;
085import gudusoft.gsqlparser.nodes.TMergeInsertClause;
086
087import java.util.ArrayDeque;
088import java.util.ArrayList;
089import java.util.Collections;
090import java.util.Deque;
091import java.util.EnumSet;
092import java.util.HashMap;
093import java.util.HashSet;
094import java.util.IdentityHashMap;
095import java.util.LinkedHashMap;
096import java.util.LinkedHashSet;
097import java.util.List;
098import java.util.Locale;
099import java.util.Map;
100import java.util.Set;
101
102/**
103 * Builds a {@link SemanticProgram} from a parsed and resolved
104 * {@link TSelectSqlStatement}.
105 *
106 * <p>Current scope (after slice 9): SELECT with one or more base-table or
107 * CTE sources, optional WHERE, optional JOIN of base tables with ON
108 * conditions, optional GROUP BY (slice 6), optional WITH clause including
109 * chained CTEs (each CTE sees the ones declared strictly before it),
110 * optional FROM-clause subquery (slice 5), optional row-deduplication via
111 * {@code SELECT DISTINCT} or Oracle's {@code SELECT UNIQUE} synonym
112 * (slice 8 — see {@link StatementGraph#isDistinct()}), optional ORDER BY
113 * over physical column references or column-bearing expressions
114 * (slice 9 — see {@link StatementGraph#getOrderByColumnRefs()}).
115 * Expression projections like {@code salary * 2 AS doubled} or
116 * {@code a.x + a.y} are accepted and marked
117 * {@link OutputColumn#isDerived()}; aggregate function calls (slice 6)
118 * are flagged via {@link OutputColumn#isAggregate()}.
119 *
120 * <p>Slice 9 lifts {@code ORDER BY} for sort keys that are physical
121 * column references or expressions over them. The collected references
122 * surface as {@link StatementGraph#getOrderByColumnRefs()}. Sort
123 * direction ({@code ASC}/{@code DESC}) and null placement
124 * ({@code NULLS FIRST}/{@code NULLS LAST}) are presentation metadata
125 * and are not modelled. Ordinal forms ({@code ORDER BY 1}) and
126 * projection-alias forms ({@code SELECT id AS x ... ORDER BY x}) are
127 * rejected so the dependency information is never silently lost; a
128 * later slice can model output-position references explicitly. The
129 * canonical lineage model (slice 7) deliberately ignores ORDER BY —
130 * sort order changes presentation, not column dependency or row-set
131 * membership.
132 *
133 * <p>Row-limit clauses ({@code LIMIT}, {@code TOP}, {@code OFFSET},
134 * {@code FETCH FIRST}) are rejected statement-wide, including the
135 * SQL Server-style {@code ORDER BY ... OFFSET ... FETCH NEXT}. With
136 * a row-limit present, {@code ORDER BY} ceases to be presentation-only
137 * and starts deciding which rows survive — the canonical-model
138 * exclusion would no longer be sound, so the entire statement is out
139 * of scope until a future slice models row-limit semantics.
140 *
141 * <p>Slice 10 lifts {@code HAVING}: the predicate's column references
142 * are collected into {@link StatementGraph#getHavingColumnRefs()} via
143 * {@link #buildHavingColumnRefs}. The same visitor pattern as projection
144 * and ORDER BY rejects subqueries (scalar, EXISTS, IN-SELECT, ANY/ALL/
145 * SOME) and window functions before {@link #collectColumnRefs} runs, so
146 * inner-scope refs never leak. HAVING without GROUP BY is supported (the
147 * parser still attaches a {@link TGroupBy} node with empty items).
148 * HAVING is row-influence semantically but does not contribute to the
149 * canonical lineage model — see
150 * {@link StatementGraph#getHavingColumnRefs()} for why.
151 *
152 * <p>Slice 11 lifted uncorrelated scalar subqueries in projection;
153 * scalar bodies are extracted as their own statements via
154 * {@link #extractScalarSubqueriesAsStatements} with the synthetic-name
155 * convention {@code <scalar_subquery_<index>>}.
156 *
157 * <p>Slice 12 lifts set operations (UNION / UNION ALL / INTERSECT /
158 * INTERSECT ALL / MINUS / MINUS ALL / EXCEPT / EXCEPT ALL) at the top
159 * level and as CTE bodies. Each branch becomes its own
160 * {@link StatementGraph} with synthetic name
161 * {@code <set_op_branch_<index>>}; the outer set-op statement carries
162 * empty {@code relations} and lineage edges fan out per-position to
163 * each branch. The flatten descends the left-leaning AST iteratively
164 * (per CLAUDE.md — no recursion on {@code leftStmt}/{@code rightStmt}).
165 * See {@link #buildSetOpProgram}.
166 *
167 * <p>Slice 22 lifts window-function frame clauses
168 * ({@code ROWS}/{@code RANGE}/{@code GROUPS BETWEEN ...}); the frame
169 * unit, start bound, and optional end bound are captured in
170 * {@link WindowFrame} hung off
171 * {@link WindowSpec#getFrame()}. Frame info is presentation-only
172 * (dlineage XML harvests no frame information) and does NOT contribute
173 * to the canonical lineage model — same status as slice-13's
174 * PARTITION BY / OVER ORDER BY refs. Per-bound EXCLUDE clauses
175 * (Netezza-reachable) and non-constant offsets (PG
176 * {@code simple_object_name_t}, ANSI {@code parenthesis_t}) are still
177 * rejected.
178 *
179 * <p>Still rejected: {@code WITH RECURSIVE}, {@code DISTINCT ON (...)}
180 * and other non-{@code DISTINCT}/{@code UNIQUE} row-filters,
181 * scalar-body constant-only projections (zero column refs),
182 * correlated scalar subqueries, scalar bodies with
183 * subqueries in WHERE/JOIN ON/GROUP BY, multi-column scalar inner,
184 * scalar subqueries embedded in larger projection expressions including
185 * EXISTS-in-projection, embedded window functions in larger projection
186 * expressions, window functions in scalar-subquery bodies, window
187 * functions in WHERE/JOIN ON/GROUP BY/HAVING/ORDER BY, empty
188 * {@code OVER ()}, frame clauses with non-constant offsets (PG
189 * {@code simple_object_name_t}, ANSI {@code parenthesis_t}), frame
190 * {@code EXCLUDE} clauses (Netezza-reachable), named windows,
191 * vendor-specific window extensions ({@code FILTER (WHERE ...)},
192 * {@code WITHIN GROUP},
193 * {@code KEEP DENSE_RANK}, Hive {@code DISTRIBUTE BY}/{@code CLUSTER BY}/
194 * {@code SORT BY}/{@code PARTITION BY ... SORT (...)}), non-physical
195 * {@code PARTITION BY} / OVER {@code ORDER BY} refs (literals,
196 * subqueries, function calls, expressions, expression-alias references),
197 * window function names outside the slice-13 allowlist,
198 * (slice 63 lifts explicit {@code CROSS JOIN}, slice 64 lifts
199 * {@code JOIN ... USING (...)}, and slice 66 lifts {@code NATURAL JOIN}
200 * at outer / CTE-body / FROM-subquery-body call sites; all three stay
201 * rejected inside scalar / set-op-branch / set-op-CTE / predicate bodies;
202 * NATURAL additionally requires resolvable catalog metadata on both
203 * sides, with a side-specific reject otherwise), duplicate aliases,
204 * Oracle
205 * {@code ORDER SIBLINGS BY}, Teradata {@code ORDER BY ... RESET WHEN},
206 * row-limit clauses, ORDER BY ordinals/aliases, Teradata {@code QUALIFY}
207 * clause, set operations nested in FROM-subquery / scalar bodies,
208 * mixed-operator and mixed-{@code _ALL} set-op chains, set-op outer
209 * ORDER BY / row-limit clauses, set-op internal-node modifiers, branch
210 * column-count mismatch, set-op branches with FROM-subquery / scalar
211 * projection / their own CTE list, nested WITH on set-op CTE body. The
212 * builder fails fast outside this scope so callers see the unsupported
213 * case immediately rather than receiving a half-built IR.
214 */
215public final class SemanticIRBuilder {
216
217    /**
218     * Reserved name prefix for synthetic scalar-subquery body
219     * statements (slice 11). Names take the form
220     * {@code "<scalar_subquery_<index>>"}; the angle brackets ensure
221     * no collision with real CTE names or FROM-clause aliases.
222     * {@link #isScalarSyntheticName(String)} is the only authorised
223     * detector — both this builder and
224     * {@code SemanticIRProjector.BodyIndexes} use it so the convention
225     * lives in one place.
226     */
227    public static final String SCALAR_BODY_PREFIX = "<scalar_subquery_";
228
229    /**
230     * Strict regex for synthetic scalar-subquery-body names. Format is
231     * exactly {@code <scalar_subquery_<digits>>} — pinning the digits
232     * suffix and the closing angle bracket prevents a real (quoted)
233     * CTE alias that happens to begin with the prefix from being
234     * misclassified as a synthetic name and silently skipped by
235     * {@code BodyIndexes}.
236     */
237    private static final java.util.regex.Pattern SCALAR_NAME_PATTERN =
238            java.util.regex.Pattern.compile("<scalar_subquery_\\d+>");
239
240    /**
241     * True iff {@code name} is a synthetic scalar-subquery-body name
242     * created by this builder (slice 11). Used by
243     * {@code SemanticIRProjector.BodyIndexes} to skip such bodies when
244     * building the CTE/FROM-subquery name lookup tables — scalar
245     * bodies are reached only via lineage edges, never via relations.
246     *
247     * <p>The match is strict: the name must be the full reserved
248     * pattern {@code <scalar_subquery_<digits>>}. A real CTE alias
249     * that happens to start with {@code <scalar_subquery_} but
250     * doesn't match the digits-and-closing-bracket suffix is NOT
251     * skipped (codex impl-review round-1 SHOULD 2).
252     */
253    public static boolean isScalarSyntheticName(String name) {
254        return name != null && SCALAR_NAME_PATTERN.matcher(name).matches();
255    }
256
257    /**
258     * Reserved name prefix for synthetic set-op-branch body statements
259     * (slice 12). Names take the form {@code "<set_op_branch_<index>>"};
260     * the angle brackets ensure no collision with real CTE names or
261     * FROM-clause aliases. {@link #isSetOpBranchSyntheticName(String)} is
262     * the only authorised detector — both this builder and
263     * {@code SemanticIRProjector.BodyIndexes} use it so the convention
264     * lives in one place (slice-11 process lesson #10 generalised).
265     */
266    public static final String SET_OP_BRANCH_PREFIX = "<set_op_branch_";
267
268    /**
269     * Strict regex for synthetic set-op-branch-body names. Format is
270     * exactly {@code <set_op_branch_<digits>>} — pinning the digits
271     * suffix and the closing angle bracket prevents a real (quoted) CTE
272     * alias that happens to begin with the prefix from being
273     * misclassified as a synthetic name and silently skipped by
274     * {@code BodyIndexes}.
275     */
276    private static final java.util.regex.Pattern SET_OP_BRANCH_NAME_PATTERN =
277            java.util.regex.Pattern.compile("^<set_op_branch_\\d+>$");
278
279    /**
280     * True iff {@code name} is a synthetic set-op-branch-body name
281     * created by this builder (slice 12). Used by
282     * {@code SemanticIRProjector.BodyIndexes} to skip such bodies when
283     * building the CTE/FROM-subquery name lookup tables — set-op
284     * branches are reached only via lineage edges, never via relations.
285     *
286     * <p>The match is strict: the name must be the full reserved
287     * pattern {@code <set_op_branch_<digits>>}.
288     */
289    public static boolean isSetOpBranchSyntheticName(String name) {
290        return name != null && SET_OP_BRANCH_NAME_PATTERN.matcher(name).matches();
291    }
292
293    /**
294     * Reserved name prefix for synthetic predicate-subquery body statements
295     * (slice 23 — uncorrelated EXISTS extracted from outer-SELECT JOIN ON).
296     * Names take the form {@code "<predicate_subquery_<index>>"}; the angle
297     * brackets ensure no collision with real CTE names or FROM-clause aliases.
298     * {@link #isPredicateSubquerySyntheticName(String)} is the only authorised
299     * detector — both this builder and {@code SemanticIRProjector.BodyIndexes}
300     * use it so the convention lives in one place.
301     */
302    public static final String PREDICATE_BODY_PREFIX = "<predicate_subquery_";
303
304    /**
305     * Strict regex for synthetic predicate-subquery-body names. Format is
306     * exactly {@code <predicate_subquery_<digits>>}; pinning the digit suffix
307     * and the closing angle bracket prevents a real (quoted) CTE alias that
308     * happens to begin with the prefix from being misclassified as a synthetic
309     * name and silently skipped by {@code BodyIndexes}.
310     */
311    private static final java.util.regex.Pattern PREDICATE_BODY_NAME_PATTERN =
312            java.util.regex.Pattern.compile("<predicate_subquery_\\d+>");
313
314    /**
315     * True iff {@code name} is a synthetic predicate-subquery-body name
316     * created by this builder (slice 23). Used by
317     * {@code SemanticIRProjector.BodyIndexes} to skip such bodies when
318     * building the CTE/FROM-subquery name lookup tables — predicate-subquery
319     * bodies are unreachable from outer (no relation edge, no lineage edge).
320     */
321    public static boolean isPredicateSubquerySyntheticName(String name) {
322        return name != null && PREDICATE_BODY_NAME_PATTERN.matcher(name).matches();
323    }
324
325    /**
326     * Aggregate function names recognized by the builder's per-output
327     * aggregate flag detection (slice 6 originated; slice 29 / slice 30
328     * extended). Treated as case-insensitive. Callers should go through
329     * {@link #isAggregateFunction(TExpression)} rather than reading this
330     * set directly.
331     *
332     * <p>Slice-29 extensions: dialect aggregates {@code listagg},
333     * {@code string_agg}, {@code group_concat}, {@code array_agg}.
334     * Slice-30 extension: {@code mode} (PostgreSQL ordered-set aggregate;
335     * admitted via the slice-29 WITHIN GROUP path under
336     * {@code findUnsupportedWithinGroupFunctionName}). Slice 30 also
337     * removes {@code mode} from {@link #WINDOW_FUNCTION_NAMES} via an
338     * explicit {@code s.remove("mode")} so the slice-13 window allowlist
339     * isn't widened — see {@link #WINDOW_FUNCTION_NAMES} JavaDoc and
340     * {@code DlineageXmlProjector.ORDER_BY_WITHIN_GROUP_AGGREGATE_NAMES} for
341     * the matching window-vs-aggregate discriminator override.
342     */
343    private static final Set<String> AGGREGATE_FUNCTION_NAMES;
344    static {
345        Set<String> s = new HashSet<>();
346        s.add("count");
347        s.add("sum");
348        s.add("avg");
349        s.add("min");
350        s.add("max");
351        s.add("stddev");
352        s.add("variance");
353        s.add("var_samp");
354        s.add("var_pop");
355        s.add("stddev_samp");
356        s.add("stddev_pop");
357        // Common dialect-specific aggregates so the flag has fewer false negatives.
358        s.add("listagg");      // Oracle, PostgreSQL 16+
359        s.add("string_agg");   // PostgreSQL, SQL Server
360        s.add("group_concat"); // MySQL
361        s.add("array_agg");    // PostgreSQL, Snowflake, BigQuery
362        // Slice 30: PostgreSQL ordered-set aggregate. Unlike percentile_cont /
363        // percentile_disc / rank-family, mode() has no documented window form
364        // in any GSP-supported vendor; admitting it lets the WITHIN GROUP path
365        // accept it in JOIN ON predicate subqueries (slice 29 lift extension)
366        // AND lets DlineageXmlProjector mark its output aggregate=true.
367        // Defensive: WINDOW_FUNCTION_NAMES below subtracts mode after
368        // s.addAll(AGGREGATE_FUNCTION_NAMES) so mode() OVER (...) stays
369        // rejected by the slice-13 window allowlist.
370        s.add("mode");         // PostgreSQL ordered-set aggregate (slice 30)
371        AGGREGATE_FUNCTION_NAMES = Collections.unmodifiableSet(s);
372    }
373
374    /**
375     * Slice 42: hypothetical-set ordered-set aggregate function names that
376     * are ALSO valid window functions. Unlike {@link #AGGREGATE_FUNCTION_NAMES}
377     * these names are admitted as aggregates ONLY when the call carries a
378     * {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only}
379     * {@link TWindowDef} attachment (Oracle / SQL Server parser style —
380     * {@code RANK(100) WITHIN GROUP (ORDER BY x.id)} produces
381     * {@code fn.getWindowDef()!=null}, {@code wd.isIncludingOverClause()==
382     * false}, {@code wd.getWithinGroup()!=null}). Any other shape — direct
383     * {@code fn.getWithinGroup()} (PG / Snowflake style),
384     * {@code fn.getWindowDef()} with {@code OVER (...)}, or no attachment
385     * at all — keeps the existing window-function classification.
386     *
387     * <p>The set is intentionally NOT merged into
388     * {@link #AGGREGATE_FUNCTION_NAMES} because that would also lift the PG
389     * direct-attachment hypothetical-set form ({@code rank(0.5) WITHIN GROUP
390     * (ORDER BY x.salary)}). Pre-plan probe ({@code /tmp/probe42/Probe42.java})
391     * confirmed PG dlineage XML for hypothetical-set is structurally
392     * indistinguishable from {@code rank() OVER (ORDER BY x)} (both emit
393     * {@code clauseType="orderby"} fdr) — admitting PG hypothetical-set
394     * would manufacture an {@code AGGREGATION_MISMATCH} divergence on the
395     * windowed form because the projector's
396     * {@code DlineageXmlProjector.isWindowFunctionResultset} cannot tell
397     * the two forms apart on PG.
398     *
399     * <p>The Oracle / MSSQL hypothetical-set form, by contrast, emits
400     * neither a {@code clauseType="orderby"} fdr nor a
401     * {@code clauseType="selectList"} fdr (probe-confirmed) — so the
402     * projector's slice-13 windowed-vs-aggregate discriminator returns
403     * {@code false} and the matching projector-side
404     * {@code AGGREGATE_FUNCTION_NAMES} entry marks the output aggregate.
405     * Their OVER form ({@code RANK() OVER (ORDER BY x.id)}) emits
406     * {@code clauseType="orderby"} as expected and stays correctly
407     * classified as windowed.
408     *
409     * <p>Vendor-gated to Oracle / MSSQL inside
410     * {@link #isAdmittedTopLevelWithinGroupAggregate} and
411     * {@link #findUnsupportedWithinGroupFunctionName}; the PG
412     * direct-attachment shape never satisfies the
413     * {@link #isWithinGroupOnlyWindowDef} predicate (because PG sets
414     * {@code fn.getWindowDef()==null}) so the carve-out cannot accidentally
415     * fire on PG.
416     */
417    private static final Set<String> HYPOTHETICAL_SET_AGGREGATE_NAMES;
418    static {
419        Set<String> s = new HashSet<>();
420        s.add("rank");
421        s.add("dense_rank");
422        s.add("percent_rank");
423        s.add("cume_dist");
424        HYPOTHETICAL_SET_AGGREGATE_NAMES = Collections.unmodifiableSet(s);
425    }
426
427    /**
428     * Slice 42: true iff {@code fn} is an Oracle / MSSQL hypothetical-set
429     * ordered-set aggregate call shape — {@code RANK} / {@code DENSE_RANK} /
430     * {@code PERCENT_RANK} / {@code CUME_DIST} with
431     * {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only}
432     * {@link TWindowDef} attachment. Used both as a name-whitelist
433     * discriminator (so PG direct {@code fn.getWithinGroup()} cannot
434     * accidentally pass through, since PG sets {@code fn.getWindowDef()==
435     * null}) and as the {@link #isAggregateFunction} carve-out trigger.
436     */
437    private static boolean isHypotheticalSetWithinGroupCall(TFunctionCall fn) {
438        if (fn == null) return false;
439        if (!isWithinGroupOnlyWindowDef(fn.getWindowDef())) return false;
440        if (fn.getFunctionName() == null) return false;
441        String name = fn.getFunctionName().toString();
442        if (name == null || name.isEmpty()) return false;
443        return HYPOTHETICAL_SET_AGGREGATE_NAMES.contains(
444                name.toLowerCase(Locale.ROOT));
445    }
446
447    /**
448     * Predicate-bearing join types accepted by the current builder.
449     * Slice 64: each must carry either an ON condition or a USING
450     * clause; the per-key {@code joinColumnRefs} emission happens in
451     * {@link #buildRelations} for USING and via
452     * {@link #collectColumnRefs} for ON. NATURAL, semi/anti,
453     * vendor-specific joins, and nested-join sources stay rejected so
454     * the IR cannot quietly drop a row-set predicate. The unqualified
455     * output-naming case for USING merged keys is deferred to S65.
456     */
457    private static final EnumSet<EJoinType> ALLOWED_PREDICATE_JOIN_TYPES = EnumSet.of(
458            EJoinType.inner,
459            EJoinType.left,
460            EJoinType.right,
461            EJoinType.full,
462            EJoinType.fullouter,
463            EJoinType.leftouter,
464            EJoinType.rightouter,
465            EJoinType.join
466    );
467
468    /**
469     * Slice 63 — join types admitted by the builder but that must NOT
470     * carry an ON or USING clause. Currently just {@code CROSS}; the
471     * tier exists so that future ON-less shapes can join the same path
472     * with the same shape contract. Slice 66 added a separate
473     * {@link #NATURAL_JOIN_TYPES} tier because NATURAL has its own
474     * catalog-required reject path that CROSS does not.
475     */
476    private static final EnumSet<EJoinType> ALLOWED_ON_LESS_JOIN_TYPES = EnumSet.of(
477            EJoinType.cross
478    );
479
480    /**
481     * Slice 66 — NATURAL join types. Each MUST NOT carry an ON or USING
482     * clause. Each MUST have resolvable catalog metadata on BOTH sides;
483     * a missing-catalog reject fires inside {@link #buildRelations}
484     * with a side-specific diagnostic. The shared-column list is
485     * inferred from the running {@link LeftOutputState} ∩ right's
486     * catalog and feeds into {@link #emitMergedJoinRefs} the same way
487     * a syntactically-declared USING list does.
488     */
489    private static final EnumSet<EJoinType> NATURAL_JOIN_TYPES = EnumSet.of(
490            EJoinType.natural,
491            EJoinType.natural_inner,
492            EJoinType.natural_left,
493            EJoinType.natural_right,
494            EJoinType.natural_leftouter,
495            EJoinType.natural_rightouter,
496            EJoinType.natural_full,
497            EJoinType.natural_fullouter
498    );
499
500    private static boolean isNaturalJoinType(EJoinType jt) {
501        return jt != null && NATURAL_JOIN_TYPES.contains(jt);
502    }
503
504    private SemanticIRBuilder() {}
505
506    public static SemanticProgram build(TSelectSqlStatement select, NameBindingProvider provider) {
507        if (select == null) {
508            throw new IllegalArgumentException("select must not be null");
509        }
510        if (provider == null) {
511            throw new IllegalArgumentException("provider must not be null");
512        }
513        List<StatementGraph> stmts = new ArrayList<>();
514        List<LineageEdge> lineage = new ArrayList<>();
515        Map<String, Integer> cteNameToStatementIndex = new HashMap<>();
516        Map<String, List<String>> ctePublishedColumns = new HashMap<>();
517
518        TCTEList cteList = select.getCteList();
519        boolean hasOuterCteList = cteList != null && cteList.size() > 0;
520
521        // Slice 108 Phase 0 — extract inline SELECT-side CTE walker into
522        // buildSelectCteList helper. Phase 0 is behaviour-preserving: the
523        // helper called with allowShadowOverride=false and
524        // additionalAllCteNames=null reproduces the prior inline walker.
525        // Phase 1 (shadow override) and Phase 3 (mixed outer+inner WITH on
526        // INSERT) reuse the same helper from buildInsert.
527        buildSelectCteList(cteList, provider, stmts, lineage,
528                cteNameToStatementIndex, ctePublishedColumns,
529                /*allowShadowOverride=*/ false,
530                /*additionalAllCteNames=*/ null);
531
532        // Slice 108 Phase 0 — extract outer-SELECT processing into
533        // buildSelectBodyAfterCteWalk helper. The hasOuterCteListAlreadyProcessed
534        // flag is passed explicitly so the buildInsert shadow path can null
535        // both AST CTE lists before calling and still claim "CTEs already
536        // walked" (round-2 codex BLOCKER 4 fix).
537        buildSelectBodyAfterCteWalk(select, provider, stmts, lineage,
538                cteNameToStatementIndex, ctePublishedColumns,
539                /*hasOuterCteListAlreadyProcessed=*/ hasOuterCteList);
540
541        return new SemanticProgram(stmts, lineage);
542    }
543
544    /**
545     * Slice 108 — walk a SELECT-side WITH clause and append each CTE body
546     * to {@code stmts} as a preceding statement. Extracted from the inline
547     * walker that previously lived in {@link #build} (lines ~516–663 pre-
548     * slice-108). Mirrors the slice-101 {@link #buildMergeCteList},
549     * slice-105 {@link #buildUpdateCteList}, and slice-106
550     * {@link #buildDeleteCteList} helpers.
551     *
552     * <p>Phase 0 (behaviour-preserving refactor): {@code allowShadowOverride
553     * = false} and {@code additionalAllCteNames = null} reproduce the
554     * pre-slice-108 inline walker byte-for-byte.
555     *
556     * <p>Phase 1 (shadow admit): {@code allowShadowOverride = true} enables
557     * the mixed outer+inner WITH on INSERT shadow case (slice 108). When
558     * called from {@link #buildInsert}, the OUTER pass runs with
559     * {@code allowShadowOverride=false}, populating
560     * {@code cteNameToStatementIndex} and {@code ctePublishedColumns} with
561     * outer CTE bindings. The INNER pass then runs with
562     * {@code allowShadowOverride=true} and
563     * {@code additionalAllCteNames=outerAllNames}. The inner pass:
564     * <ul>
565     *   <li>uses a fresh local {@code localVisibleSoFar} for intra-list
566     *       duplicate detection (so {@code DUPLICATE_CTE_NAME} still
567     *       fires for inner {@code x, x} even when outer also declares
568     *       {@code x});</li>
569     *   <li>snapshots {@code cteNameToStatementIndex.keySet()} at entry
570     *       into {@code outerKeysSnapshot}; the union
571     *       {@code outerKeysSnapshot ∪ localVisibleSoFar} drives BOTH
572     *       {@link #rejectForwardCteReferences} AND
573     *       {@link NameBindingProvider#withCteContext} (round-2 codex
574     *       BLOCKER 3 fix — keeps inner-y references to outer-x from
575     *       being falsely flagged as forward references);</li>
576     *   <li>on collision with an outer entry (after a successful body
577     *       build), {@link Map#put} overrides the
578     *       {@code cteNameToStatementIndex} and {@code ctePublishedColumns}
579     *       entries so the source SELECT sees the INNER body. The OUTER
580     *       body stays in {@code stmts[]} at its earlier position; its
581     *       cteMap entry is just no longer referenced by name (PG nested-
582     *       WITH inner-shadows-outer semantics).</li>
583     * </ul>
584     *
585     * <p>{@code additionalAllCteNames} is unioned into the per-call
586     * {@code allCteNames} that {@link #rejectForwardCteReferences} consults
587     * (round-1 codex BLOCKER 2 fix — keeps each scope's forward-ref check
588     * narrow so an outer CTE body referencing a base-table whose name
589     * happens to coincide with an inner CTE name does NOT falsely flag).
590     */
591    private static void buildSelectCteList(
592            TCTEList cteList,
593            NameBindingProvider provider,
594            List<StatementGraph> stmts,
595            List<LineageEdge> lineage,
596            Map<String, Integer> cteNameToStatementIndex,
597            Map<String, List<String>> ctePublishedColumns,
598            boolean allowShadowOverride,
599            Set<String> additionalAllCteNames) {
600        if (cteList == null || cteList.size() == 0) {
601            return;
602        }
603        rejectRecursiveCtes(cteList);
604
605        // Per-call allCteNames for rejectForwardCteReferences. The optional
606        // additionalAllCteNames extends this scope (Phase 1: outer names
607        // visible to inner CTE body forward-ref checks). Phase 0 path passes
608        // null, so this is just collectCteNames(cteList).
609        Set<String> allCteNames;
610        if (additionalAllCteNames != null && !additionalAllCteNames.isEmpty()) {
611            allCteNames = new HashSet<>(collectCteNames(cteList));
612            allCteNames.addAll(additionalAllCteNames);
613        } else {
614            allCteNames = collectCteNames(cteList);
615        }
616
617        // Phase 1: snapshot outer-scope CTE names at entry so subsequent
618        // iterations of this list always see the FULL outer scope for
619        // forward-ref classification and withCteContext, even if a shadow
620        // override later overwrites a name's cteMap entry.
621        Set<String> outerKeysSnapshot = allowShadowOverride
622                ? new HashSet<>(cteNameToStatementIndex.keySet())
623                : null;
624
625        // Build each CTE body left-to-right. Each CTE sees CTEs declared
626        // strictly before it (standard SQL chain semantics, slice 4).
627        // Slice 18: CTE bodies accept FROM-subqueries (mirroring the
628        // outer-SELECT extraction path) AND scalar-subquery projections
629        // (slice 11): for each CTE body, FROM-subqueries are extracted
630        // first, then scalar bodies, then the CTE body is built/appended.
631        // The per-CTE-body subqueryAliasToIndex is local to the iteration
632        // so different CTE bodies cannot collide on FROM-subquery aliases.
633        // Slice 60: running map of "CTE name → published column names"
634        // for star expansion. Each CTE's published columns are added
635        // AFTER its body is built so a CTE cannot self-reference and
636        // forward references (rejected earlier) cannot leak through.
637        // Set-op CTE bodies use the merged StatementGraph.outputColumns.
638        // For non-set-op CTE bodies the column names also come from
639        // StatementGraph.outputColumns. Explicit CTE column lists are
640        // rejected at the star expander, not at populate time.
641        Set<String> localVisibleSoFar = new HashSet<>();
642        for (int i = 0; i < cteList.size(); i++) {
643            TCTE cte = cteList.getCTE(i);
644            String cteName = cte.getTableName().toString();
645            String cteNameLower = cteName.toLowerCase(Locale.ROOT);
646            // Slice 15 MUST 9 / round-4 MUST 1: reject duplicate CTE
647            // names BEFORE rejectForwardCteReferences so duplicate-name
648            // diagnostics are not preempted by forward-reference
649            // diagnostics. cteNameToStatementIndex is keyed lower-case;
650            // a duplicate entry would silently overwrite the earlier
651            // body and leave OUTER_REFERENCE-of-CTE pointing at the
652            // wrong statement.
653            //
654            // Slice 108: intra-list duplicate check uses localVisibleSoFar
655            // (NOT outerKeysSnapshot) so an inner CTE shadowing an outer
656            // CTE is admitted while inner-x, inner-x stays rejected (round-1
657            // codex BLOCKER 1 fix).
658            if (localVisibleSoFar.contains(cteNameLower)) {
659                throw new SemanticIRBuildException(
660                        Diagnostic.error(DiagnosticCode.DUPLICATE_CTE_NAME,
661                        "duplicate CTE name '" + cteName
662                                + "' in WITH clause; CTE names must be unique", cte));
663            }
664            // Slice 108: effectiveVisible = outerKeysSnapshot ∪ localVisibleSoFar.
665            // Drives BOTH rejectForwardCteReferences AND
666            // bodyProvider.withCteContext so inner-y body referencing outer-x
667            // is admitted (round-2 codex BLOCKER 3 fix).
668            Set<String> effectiveVisible;
669            if (outerKeysSnapshot != null) {
670                if (outerKeysSnapshot.isEmpty()) {
671                    effectiveVisible = localVisibleSoFar;
672                } else if (localVisibleSoFar.isEmpty()) {
673                    effectiveVisible = outerKeysSnapshot;
674                } else {
675                    effectiveVisible = new HashSet<>(outerKeysSnapshot);
676                    effectiveVisible.addAll(localVisibleSoFar);
677                }
678            } else {
679                effectiveVisible = localVisibleSoFar;
680            }
681            rejectForwardCteReferences(cte, allCteNames, effectiveVisible);
682            // Slice 60: bodyProvider gets the CTE-context narrowing
683            // first; the effective-alias-keyed in-scope map is
684            // applied LATER, after the body's own FROM-subqueries
685            // are extracted (so we can walk the body's FROM clause
686            // and resolve each relation to its effective alias).
687            // This deferred narrowing replaces the slice-60 v1 path
688            // that put the running ctePublishedColumns map (CTE-name
689            // keyed) directly on the provider — that keying class
690            // could collide when a subquery alias matched a CTE
691            // name (codex diff-review).
692            NameBindingProvider bodyProvider = provider.withCteContext(effectiveVisible);
693            TSelectSqlStatement cteBody = cte.getSubquery();
694            // Slice 103 — snapshot lineage size BEFORE the body branch so
695            // the slice-102 rename helper can rewrite outgoing
696            // STATEMENT_OUTPUT refs in [lineageSize0, lineage.size())
697            // without touching prior CTE bodies' edges. Covers BOTH the
698            // set-op and non-set-op branches (mirrors slice-102
699            // buildMergeCteList at line ~5820).
700            int lineageSize0 = lineage.size();
701            int bodyIdx;
702            if (cteBody != null
703                    && cteBody.getSetOperatorType() != null
704                    && cteBody.getSetOperatorType() != ESetOperatorType.none) {
705                // Slice 12: set-op CTE body. The outer set-op statement
706                // carries the CTE name so BodyIndexes.cteByConsumerAndName
707                // resolves it (slice-18 consumer-keyed projector lookup).
708                // The CTE body's CTE list (if any) is rejected as a
709                // nested-WITH inside buildSetOpProgram.
710                bodyIdx = buildSetOpProgram(cteBody, bodyProvider, stmts, lineage,
711                        cteNameToStatementIndex, cteName,
712                        /*hasOuterCteListAlreadyProcessed=*/ false);
713                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
714            } else {
715                // Slice 18: snapshot/rollback around recursive
716                // FROM-subquery extraction inside this CTE body.
717                // Mirrors the outer-SELECT wrapper below and the
718                // slice-16 set-op wrapper. Currently defensive: a
719                // thrown exception in a deeper level would otherwise
720                // leak siblings/ancestors at this CTE's level into
721                // stmts/lineage. The wrapper truncates back to the
722                // pre-extraction boundary and rethrows. Per-CTE
723                // granularity: earlier CTE bodies in the same WITH
724                // list are NOT rolled back (they're already complete).
725                int cteStmtsSize0 = stmts.size();
726                int cteLineageSize0 = lineage.size();
727                Map<String, Integer> cteSubqueryAliasToIndex;
728                try {
729                    // Slice 60: pass the running ctePublishedColumns
730                    // so the body's own FROM-subqueries see earlier
731                    // CTEs at every recursion level.
732                    cteSubqueryAliasToIndex =
733                            extractFromSubqueriesAsStatements(cteBody, bodyProvider,
734                                    stmts, lineage, cteNameToStatementIndex,
735                                    ctePublishedColumns);
736                } catch (RuntimeException ex) {
737                    while (stmts.size() > cteStmtsSize0) stmts.remove(stmts.size() - 1);
738                    while (lineage.size() > cteLineageSize0) lineage.remove(lineage.size() - 1);
739                    throw ex;
740                }
741                EnclosingScope cteEnclosing = buildEnclosingScope(cteBody,
742                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
743                        /*parent=*/ null);
744                Map<Integer, ScalarInfo> cteScalarMap =
745                        extractScalarSubqueriesAsStatements(cteBody,
746                                bodyProvider, stmts, lineage,
747                                cteNameToStatementIndex, cteEnclosing,
748                                /*allowRecursiveScalarSubqueryExtraction=*/ true);
749                // Slice 60 (codex diff-review): build the per-CTE
750                // effective-alias-keyed in-scope map by walking the
751                // CTE body's FROM list. CTE references and
752                // FROM-subquery aliases live in the same FROM
753                // namespace (preflight rejects duplicates), so
754                // effective-alias keying makes a name collision
755                // physically impossible.
756                Map<String, List<String>> cteBodyInScope =
757                        buildEffectiveAliasInScopeMap(cteBody, bodyProvider,
758                                ctePublishedColumns, cteSubqueryAliasToIndex,
759                                stmts);
760                NameBindingProvider cteBodyProviderWithStar = bodyProvider
761                        .withInScopeRelationColumns(cteBodyInScope);
762                // Slice 114 — switch from the 7-arg buildSelectStatement
763                // to the 14-arg buildSelectStatementImpl so the CTE
764                // body's WHERE clause can extract uncorrelated predicate
765                // subqueries (IN-SELECT / EXISTS / NOT EXISTS / scalar
766                // comparison / ANY-ALL-SOME) as their own statements.
767                // The wrapper mirrors the outer-SELECT entry pattern in
768                // {@link #build}: if the build appends predicate bodies
769                // and then a later post-extraction reject fires, the
770                // try/catch truncates stmts/lineage back to the
771                // pre-call boundary so a partial extraction doesn't
772                // leak into the program. The slice-113 set-op branch
773                // call site is itself enclosed by the slice-16
774                // SET-OP-WIDE rollback at {@link #buildSetOpProgram};
775                // the CTE-body call sites do NOT inherit a similar
776                // enclosing wrapper, which is why slice 114 adds one
777                // here. The from-subquery / scalar-subquery
778                // extractions above this point have their own
779                // slice-17/18 wrappers, so the pre-CALL snapshot
780                // bounds the truncate exactly to whatever
781                // buildSelectStatementImpl appended.
782                int cteBodyStmtsSnapshot = stmts.size();
783                int cteBodyLineageSnapshot = lineage.size();
784                StatementGraph body;
785                try {
786                    body = buildSelectStatementImpl(cteBody,
787                            cteBodyProviderWithStar, cteName,
788                            /*hasOuterCteListAlreadyProcessed=*/ false,
789                            /*allowFromSubqueries=*/ true,
790                            /*allowScalarProjectionSubqueries=*/ true,
791                            /*allowWindowProjection=*/ true,
792                            // Slice 114 — keep JOIN-ON predicate
793                            // subqueries rejected inside CTE bodies
794                            // (preserve slice 23/26 contract; the lift
795                            // is WHERE-only; the two flags are
796                            // independent per slice 113 split).
797                            /*allowJoinOnPredicateSubqueries=*/ false,
798                            /*stmtsForExtraction=*/ stmts,
799                            /*lineageForExtraction=*/ lineage,
800                            /*cteMapForExtraction=*/ cteNameToStatementIndex,
801                            /*isPredicateBody=*/ false,
802                            /*whereClauseContext=*/ PredicateClauseContext.CTE_BODY_WHERE,
803                            /*allowWherePredicateSubqueries=*/ true);
804                } catch (RuntimeException ex) {
805                    while (stmts.size() > cteBodyStmtsSnapshot) stmts.remove(stmts.size() - 1);
806                    while (lineage.size() > cteBodyLineageSnapshot) lineage.remove(lineage.size() - 1);
807                    throw ex;
808                }
809                bodyIdx = stmts.size();
810                stmts.add(body);
811                // Slice 108 — emit lineage BEFORE the cteMap.put so that
812                // in the shadow case (allowShadowOverride=true with
813                // cteNameLower already in cteMap from outer pass), the
814                // body's column refs to <cteNameLower> still resolve to
815                // the OUTER body (PG inner-x body sees outer-x via the
816                // closer-enclosing-not-yet-shadowed fallback). Non-shadow
817                // cases are unaffected because cteMap does not yet contain
818                // cteNameLower at this point and the body cannot reference
819                // its own name without going through the recursive-CTE
820                // path (already rejected upstream).
821                emitLineageForStatement(body, bodyIdx, lineage,
822                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
823                        cteScalarMap);
824                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
825            }
826            // Slice 103 — apply the slice-102 rename helper if the CTE
827            // declares an explicit column list (no-op otherwise). The
828            // helper returns the published column list (renamed if
829            // explicit, else body's inner names). Slice-60's
830            // `ctePublishedColumns.put` is collapsed into this single
831            // call site (covers both branches above).
832            List<String> publishedCols = applyExplicitCteColumnListRename(
833                    cte, stmts, lineage, bodyIdx, lineageSize0, "SELECT");
834            ctePublishedColumns.put(cteNameLower, publishedCols);
835            localVisibleSoFar.add(cteNameLower);
836        }
837    }
838
839    /**
840     * Slice 108 — outer-SELECT processing extracted from the previous inline
841     * body of {@link #build} (lines ~665–763 pre-slice-108).
842     *
843     * <p>{@code hasOuterCteListAlreadyProcessed} is an EXPLICIT boolean
844     * parameter (round-2 codex BLOCKER 4 fix). Previously this was inferred
845     * from {@code select.getCteList() != null && size > 0}; after the
846     * slice-108 buildInsert shadow path nulls {@code source.getCteList()}
847     * before calling, that inference would be wrong. The caller passes the
848     * truth.
849     *
850     * <p>The post-walk {@code cteNameToStatementIndex.keySet()} replaces the
851     * pre-walk {@code allCteNames} because the walker has populated every
852     * declared CTE name by lowercase key — they are equal sets.
853     */
854    private static void buildSelectBodyAfterCteWalk(
855            TSelectSqlStatement select,
856            NameBindingProvider provider,
857            List<StatementGraph> stmts,
858            List<LineageEdge> lineage,
859            Map<String, Integer> cteNameToStatementIndex,
860            Map<String, List<String>> ctePublishedColumns,
861            boolean hasOuterCteListAlreadyProcessed) {
862        Set<String> allCteNames = cteNameToStatementIndex.isEmpty()
863                ? Collections.<String>emptySet()
864                : new HashSet<>(cteNameToStatementIndex.keySet());
865
866        // Slice 12: top-level set-op dispatch. CTE list (if any) was
867        // already processed above; pass hasOuterCteListAlreadyProcessed=true
868        // so buildSetOpProgram doesn't re-flag it as a nested WITH.
869        if (select.getSetOperatorType() != null
870                && select.getSetOperatorType() != ESetOperatorType.none) {
871            NameBindingProvider outerProvider = provider.withCteContext(allCteNames);
872            buildSetOpProgram(select, outerProvider, stmts, lineage,
873                    cteNameToStatementIndex, /*setOpName=*/ null,
874                    /*hasOuterCteListAlreadyProcessed=*/ hasOuterCteListAlreadyProcessed);
875            return;
876        }
877
878        // Outer statement: pre-extract any FROM-clause subqueries as their
879        // own statements, then any scalar-subquery projections, then build
880        // the outer body, then emit lineage with the global CTE map, the
881        // outer-local subquery alias map, AND the scalar-projection map.
882        // Slice 60: outerProvider gets the CTE-context narrowing here;
883        // the effective-alias-keyed in-scope map is applied LATER, after
884        // outer FROM-subqueries are extracted. The same deferred
885        // narrowing pattern as the CTE-body branch — see the codex
886        // diff-review note on alias/CTE-name collision.
887        NameBindingProvider outerProvider = provider.withCteContext(allCteNames);
888        // Slice 17: snapshot/rollback around recursive FROM-subquery
889        // extraction. The recursive extractor mutates stmts/lineage as
890        // each level's bodies land; if a deeper-level rejection fires
891        // after sibling/ancestor mutations, this wrapper truncates the
892        // lists back to the pre-call boundary and rethrows. Mirrors the
893        // slice-16 buildSetOpProgram wrapper (§14.18 process lesson #21:
894        // when a class of mutation-free checks can fire after partial
895        // mutation, close it transactionally instead of point-fixing).
896        //
897        // The rollback is currently defensive: build() allocates fresh
898        // stmts/lineage per invocation, so a thrown exception's caller
899        // cannot directly observe leaked state. The wrapper is kept
900        // because (a) the slice-17 preflight closes the most direct
901        // partial-mutation classes BEFORE the recursive extraction
902        // runs, but recursive levels can still fail at deeper rejection
903        // points (e.g. a nested set-op-in-FROM-subquery body inside a
904        // sibling that succeeds at the preflight); (b) consistency with
905        // slice 16's pattern means a future refactor that lifts the
906        // build() per-call list allocation does not silently re-open
907        // the partial-mutation class.
908        int stmtsSize0 = stmts.size();
909        int lineageSize0 = lineage.size();
910        Map<String, Integer> outerSubqueryAliasToIndex;
911        try {
912            outerSubqueryAliasToIndex =
913                    extractFromSubqueriesAsStatements(select, outerProvider,
914                            stmts, lineage, cteNameToStatementIndex,
915                            ctePublishedColumns);
916        } catch (RuntimeException ex) {
917            while (stmts.size() > stmtsSize0) stmts.remove(stmts.size() - 1);
918            while (lineage.size() > lineageSize0) lineage.remove(lineage.size() - 1);
919            throw ex;
920        }
921        EnclosingScope outerEnclosing = buildEnclosingScope(select,
922                cteNameToStatementIndex, outerSubqueryAliasToIndex,
923                /*parent=*/ null);
924        Map<Integer, ScalarInfo> outerScalarMap =
925                extractScalarSubqueriesAsStatements(select, outerProvider,
926                        stmts, lineage, cteNameToStatementIndex, outerEnclosing,
927                        /*allowRecursiveScalarSubqueryExtraction=*/ true);
928        // Slice 60 (codex diff-review): build the outer's
929        // effective-alias-keyed in-scope map by walking the outer
930        // SELECT's FROM list. Effective-alias keying eliminates the
931        // CTE-name vs subquery-alias collision class.
932        Map<String, List<String>> outerInScope = buildEffectiveAliasInScopeMap(
933                select, outerProvider, ctePublishedColumns,
934                outerSubqueryAliasToIndex, stmts);
935        NameBindingProvider outerProviderWithStar = outerProvider
936                .withInScopeRelationColumns(outerInScope);
937        // Slice 23: outer-SELECT path uses buildSelectStatementImpl directly so
938        // the slice-23 EXISTS-extraction can append predicate-body statements
939        // to `stmts`/`lineage`. Snapshot/rollback wrapper around the call
940        // matches the slice-16/17/20 pattern: a partial extraction (e.g. third
941        // EXISTS rejected after first two extracted) truncates the lists.
942        int outerStmtsSnapshot = stmts.size();
943        int outerLineageSnapshot = lineage.size();
944        StatementGraph outer;
945        try {
946            outer = buildSelectStatementImpl(select, outerProviderWithStar, null,
947                    /*hasOuterCteListAlreadyProcessed=*/ hasOuterCteListAlreadyProcessed,
948                    /*allowFromSubqueries=*/ true,
949                    /*allowScalarProjectionSubqueries=*/ true,
950                    /*allowWindowProjection=*/ true,
951                    /*allowJoinOnPredicateSubqueries=*/ true,
952                    stmts, lineage,
953                    /*cteMapForExtraction=*/ cteNameToStatementIndex,
954                    /*isPredicateBody=*/ false,
955                    /*whereClauseContext=*/ PredicateClauseContext.SELECT_WHERE,
956                    /*allowWherePredicateSubqueries=*/ true);
957        } catch (RuntimeException e) {
958            while (stmts.size() > outerStmtsSnapshot) stmts.remove(stmts.size() - 1);
959            while (lineage.size() > outerLineageSnapshot) lineage.remove(lineage.size() - 1);
960            throw e;
961        }
962        int outerIndex = stmts.size();
963        stmts.add(outer);
964        emitLineageForStatement(outer, outerIndex, lineage,
965                cteNameToStatementIndex, outerSubqueryAliasToIndex, outerScalarMap);
966    }
967
968    /**
969     * Slice 78 — admit a single {@code INSERT INTO target SELECT ...}
970     * statement. Builds the source SELECT via {@link #build} (reusing
971     * the existing pipeline unchanged), then appends an {@code "INSERT"}-
972     * kind {@link StatementGraph} carrying the target relation and
973     * cross-statement lineage edges.
974     *
975     * <p>Admitted shape: {@code INSERT INTO <target> [(c1, c2, ...)]
976     * <subquery-SELECT>}. Rejections:
977     * <ul>
978     *   <li>{@link EInsertSource#values}, {@code values_empty},
979     *       {@code default_values}, {@code execute},
980     *       {@code values_function}, {@code values_multi_table},
981     *       {@code hive_query}, {@code values_oracle_record},
982     *       {@code set_column_value}, {@code value_table} →
983     *       {@link DiagnosticCode#INSERT_SOURCE_NOT_SUPPORTED}.</li>
984     *   <li>Oracle {@code INSERT ALL} / {@code INSERT FIRST} →
985     *       {@link DiagnosticCode#INSERT_MULTI_TABLE_NOT_SUPPORTED}.
986     *       Hive multi-insert ({@code multiInsertStatements} non-empty) is
987     *       routed to {@link #buildHiveMultiInsert} instead of rejected.</li>
988     *   <li>Missing target table (defensive — the parser usually rejects
989     *       first) → {@link DiagnosticCode#INSERT_TARGET_MISSING}.</li>
990     *   <li>Explicit column list arity ≠ source SELECT output count →
991     *       {@link DiagnosticCode#INSERT_COLUMN_COUNT_MISMATCH}.</li>
992     * </ul>
993     *
994     * <p>The source SELECT is built first via {@code build()} and its
995     * full {@link SemanticProgram} (CTE bodies + scalar bodies +
996     * FROM-subquery bodies + outer SELECT + cross-stmt lineage) is
997     * appended verbatim to the returned program. The INSERT
998     * {@link StatementGraph} is appended LAST; its
999     * {@link StatementGraph#getRelations() relations} lists the source
1000     * SELECT as a single {@link RelationKind#SUBQUERY} entry whose
1001     * {@code qualifiedName} is the source SELECT's outer-statement name
1002     * (synthesised when needed). All other column-ref lists stay empty
1003     * on the INSERT — an INSERT has no projection of its own.
1004     *
1005     * <p>Cross-statement {@link LineageEdge}s for the INSERT are
1006     * {@code from = TABLE_COLUMN(target_qname, target_col_i_name)}
1007     * and {@code to = STATEMENT_OUTPUT(selectIdx, source_output_i_name)}.
1008     * Target column names are the explicit INSERT column-list spellings
1009     * when supplied, else the source SELECT's positional output names.
1010     */
1011    public static SemanticProgram buildInsert(TInsertSqlStatement insert,
1012                                              NameBindingProvider provider) {
1013        if (insert == null) {
1014            throw new IllegalArgumentException("insert must not be null");
1015        }
1016        if (provider == null) {
1017            throw new IllegalArgumentException("provider must not be null");
1018        }
1019
1020        // Oracle INSERT ALL / FIRST rejects: their multi-value AST shape
1021        // is fundamentally different from the Hive multi-insert path.
1022        // Slice 78 scopes single-target INSERT SELECT; slice 93 lifts
1023        // the Hive multi-insert case via buildHiveMultiInsert.
1024        if (insert.isInsertAll() || insert.isInsertFirst()) {
1025            throw new SemanticIRBuildException(Diagnostic.error(
1026                    DiagnosticCode.INSERT_MULTI_TABLE_NOT_SUPPORTED,
1027                    "multi-table INSERT (INSERT ALL / INSERT FIRST) is not "
1028                            + "supported by SemanticIRBuilder.buildInsert; "
1029                            + "slice 78 admits single-target INSERT INTO <target> SELECT ...",
1030                    insert));
1031        }
1032        // Slice 109 — outer-WITH on Hive multi-insert
1033        // (`WITH x AS (...) FROM x INSERT INTO t1 SELECT ... INSERT INTO t2
1034        // SELECT ...`) is now admitted via buildHiveMultiInsert's CTE-aware
1035        // path. The slice-104 early reject for this shape is removed; the
1036        // helper builds the outer CTE bodies ONCE upfront and reuses the
1037        // shared cteMap/publishedMap across every sub-SELECT.
1038        // INSERT_OUTER_WITH_ON_HIVE_MULTI_INSERT_NOT_SUPPORTED stays declared-
1039        // but-unreached for API stability (slice 71/72/82/86/95/96/97/98/108
1040        // retain-for-documentation precedent).
1041        //
1042        // Hive multi-insert: FROM src INSERT INTO t1 SELECT ... INSERT INTO t2 SELECT ...
1043        // Each sub-SELECT already carries the shared FROM source in its fromClause.
1044        if (!insert.getMultiInsertStatements().isEmpty()) {
1045            return buildHiveMultiInsert(insert, provider);
1046        }
1047
1048        EInsertSource src = insert.getInsertSource();
1049        if (src != EInsertSource.subquery) {
1050            throw new SemanticIRBuildException(Diagnostic.error(
1051                    DiagnosticCode.INSERT_SOURCE_NOT_SUPPORTED,
1052                    "INSERT source '" + src + "' is not supported by "
1053                            + "SemanticIRBuilder.buildInsert; slice 78 admits "
1054                            + "subquery-source INSERT only (INSERT INTO <target> SELECT ...)",
1055                    insert));
1056        }
1057
1058        // Slice 85 — cheap statement-level OUTPUT_INTO reject runs
1059        // BEFORE the source SELECT is built so a multi-violation
1060        // shape (e.g. `INSERT INTO t OUTPUT INSERTED.x INTO #log
1061        // SELECT ... FROM bad_join`) routes to the cheaper structural
1062        // code first.
1063        if (insert.getOutputClause() != null
1064                && insert.getOutputClause().getIntoTable() != null) {
1065            throw new SemanticIRBuildException(Diagnostic.error(
1066                    DiagnosticCode.OUTPUT_INTO_NOT_SUPPORTED,
1067                    "INSERT OUTPUT ... INTO <target> writes a second target; "
1068                            + "slice 85 admits projection-only OUTPUT",
1069                    insert));
1070        }
1071
1072        TTable targetTable = insert.getTargetTable();
1073        if (targetTable == null || targetTable.getTableName() == null) {
1074            throw new SemanticIRBuildException(Diagnostic.error(
1075                    DiagnosticCode.INSERT_TARGET_MISSING,
1076                    "INSERT statement has no resolvable target table",
1077                    insert));
1078        }
1079        String targetQName = targetTable.getTableName().toString();
1080        if (targetQName.isEmpty()) {
1081            throw new SemanticIRBuildException(Diagnostic.error(
1082                    DiagnosticCode.INSERT_TARGET_MISSING,
1083                    "INSERT target table name is empty",
1084                    insert));
1085        }
1086
1087        TSelectSqlStatement source = insert.getSubQuery();
1088        if (source == null) {
1089            // Defensive: getInsertSource() == subquery but subQuery is
1090            // null. Surface as INSERT_TARGET_MISSING's source half.
1091            throw new SemanticIRBuildException(Diagnostic.error(
1092                    DiagnosticCode.INSERT_SOURCE_NOT_SUPPORTED,
1093                    "INSERT source is declared as subquery but no SELECT "
1094                            + "statement was attached",
1095                    insert));
1096        }
1097
1098        // Slice 104 — outer-WITH on INSERT. The parser attaches the outer
1099        // WITH clause to insert.getCteList(), NOT to source.getCteList().
1100        // Before slice 104 buildInsert ignored insert.getCteList(), which
1101        // silently mis-bound CTE references in the source SELECT as
1102        // TABLE-kind relations with phantom columns. The slice-104 fix is
1103        // an AST handoff: move insert.getCteList() onto source.getCteList()
1104        // for the duration of the inner build(source) call so the
1105        // slice-103 SELECT-side CTE walker handles construction, rename,
1106        // and rejects (recursive / duplicate / forward-reference / arity
1107        // mismatch). Restore in finally so the AST is observably
1108        // unchanged to the caller (Java field references — token-chain
1109        // state is perturbed by setCteList(null)'s removeTokens() but
1110        // observably benign for downstream Semantic IR).
1111        //
1112        // Slice 107 / 108 — mixed outer-WITH + inner-WITH on INSERT. PG /
1113        // Oracle / Snowflake admit at parse. Three sub-cases:
1114        //   (a) Only outer WITH populated. AST handoff (slice 104): move
1115        //       insert.cteList onto source.cteList and call build(source).
1116        //   (b) Only inner WITH populated. Pass through unchanged (the
1117        //       walker handles it on its own).
1118        //   (c) Both outer and inner WITH populated. Slice 107 admitted
1119        //       this for disjoint names via a flat-merge; slice 108 admits
1120        //       it for the SHADOWING case too (`WITH x ... INSERT ... WITH
1121        //       x ... SELECT ... FROM x` — inner shadows outer per
1122        //       PG/Oracle/Snowflake nested-WITH semantics). The slice-108
1123        //       implementation uses a TWO-PASS walker invocation in this
1124        //       method: outer pass first (allowShadowOverride=false), then
1125        //       inner pass (allowShadowOverride=true,
1126        //       additionalAllCteNames=outer-names). The walker's two-set
1127        //       visibility model (outerKeysSnapshot ∪ localVisibleSoFar)
1128        //       keeps PG semantics correct: inner-x's body sees outer-x via
1129        //       the cteMap (override is post-build), and inner CTEs declared
1130        //       after inner-x see inner-x. The OUTER body stays in stmts[]
1131        //       at its position; its cteMap entry is just no longer
1132        //       referenced by name (shadowed). Source SELECT's `FROM x`
1133        //       resolves to inner-x.
1134        //
1135        // INSERT_MIXED_OUTER_AND_INNER_WITH_NOT_SUPPORTED stays declared but
1136        // is no longer reached by slice 108. Slice107Test §F/§Q (cross-
1137        // boundary duplicate rejects) are deleted; positive coverage moves
1138        // to Slice108Test.
1139        TCTEList outerCtes = insert.getCteList();
1140        TCTEList savedSourceCtes = source.getCteList();
1141        boolean handoffApplied = false;
1142        SemanticProgram inner;
1143        boolean haveOuterCtes = outerCtes != null && outerCtes.size() > 0;
1144        boolean haveInnerCtes = savedSourceCtes != null && savedSourceCtes.size() > 0;
1145        if (haveOuterCtes && haveInnerCtes) {
1146            // Slice 108 — two-pass walker. Null both AST CTE lists before
1147            // calling buildSelectBodyAfterCteWalk so the helper does not
1148            // re-process source.getCteList(). hasOuterCteListAlreadyProcessed
1149            // is passed true (round-2 codex BLOCKER 4 fix).
1150            source.setCteList(null);
1151            insert.setCteList(null);
1152            handoffApplied = true;
1153            try {
1154                List<StatementGraph> innerStmts = new ArrayList<>();
1155                List<LineageEdge> innerLineage = new ArrayList<>();
1156                Map<String, Integer> cteMap = new HashMap<>();
1157                Map<String, List<String>> publishedMap = new HashMap<>();
1158                // Outer pass: outerAllNames as its own scope.
1159                buildSelectCteList(outerCtes, provider, innerStmts, innerLineage,
1160                        cteMap, publishedMap,
1161                        /*allowShadowOverride=*/ false,
1162                        /*additionalAllCteNames=*/ null);
1163                // Inner pass: outerAllNames also visible for forward-ref
1164                // classification (round-1 codex BLOCKER 2 fix); shadow
1165                // override admits cross-boundary duplicate names.
1166                Set<String> outerAllNames = collectCteNames(outerCtes);
1167                buildSelectCteList(savedSourceCtes, provider, innerStmts, innerLineage,
1168                        cteMap, publishedMap,
1169                        /*allowShadowOverride=*/ true,
1170                        /*additionalAllCteNames=*/ outerAllNames);
1171                // Source SELECT body sees the post-pass cteMap (inner wins
1172                // for shadowed names).
1173                buildSelectBodyAfterCteWalk(source, provider, innerStmts, innerLineage,
1174                        cteMap, publishedMap,
1175                        /*hasOuterCteListAlreadyProcessed=*/ true);
1176                inner = new SemanticProgram(innerStmts, innerLineage);
1177            } finally {
1178                source.setCteList(savedSourceCtes);
1179                insert.setCteList(outerCtes);
1180            }
1181        } else {
1182            // Single-sided cases. Slice 104 AST handoff for outer-only;
1183            // pass-through for inner-only or no CTEs.
1184            if (haveOuterCtes) {
1185                source.setCteList(outerCtes);
1186                insert.setCteList(null);
1187                handoffApplied = true;
1188            }
1189            try {
1190                inner = build(source, provider);
1191            } finally {
1192                if (handoffApplied) {
1193                    source.setCteList(savedSourceCtes);
1194                    insert.setCteList(outerCtes);
1195                }
1196            }
1197        }
1198
1199        // Slice 93 — delegate INSERT-graph assembly to the shared helper
1200        // used by both single-target (slice 78) and Hive multi-insert
1201        // (slice 93). out is freshly empty so the helper's rebase offset
1202        // is 0 (no-op for inner lineage). RETURNING/OUTPUT clauses are
1203        // passed directly (slice 85 still owns the projection build).
1204        List<StatementGraph> out = new ArrayList<>(inner.getStatements().size() + 1);
1205        List<LineageEdge> outLineage = new ArrayList<>();
1206        assembleInsertGraphAndLineage(
1207                insert, targetTable, targetQName, inner,
1208                "INSERT",
1209                insert.getReturningClause(),
1210                insert.getOutputClause(),
1211                out, outLineage, provider);
1212        return new SemanticProgram(out, outLineage);
1213    }
1214
1215    /**
1216     * Slice 93 — admit a Hive multi-insert block of the form
1217     * {@code FROM src INSERT INTO t1 SELECT col1 INSERT INTO t2 SELECT col2}.
1218     *
1219     * <p>The parser represents the whole block as one {@link TInsertSqlStatement}
1220     * whose first INSERT-SELECT pair is the primary statement and whose
1221     * additional pairs are in {@link TInsertSqlStatement#getMultiInsertStatements()}.
1222     * Crucially, each sub-SELECT already carries the shared FROM source in its own
1223     * {@code fromClause} / {@code fromSourceTable} — no post-processing is needed.
1224     *
1225     * <p>Produces a flat {@link SemanticProgram} containing per-pair blocks of
1226     * statements concatenated in INSERT order: each block contributes its source
1227     * SELECT's inner statements (CTE bodies / FROM-subquery bodies extracted by
1228     * {@link #build}) followed by its outer SELECT followed by an INSERT graph.
1229     * The minimum is {@code 2N} statements (one SELECT + one INSERT per target);
1230     * sub-SELECTs with extracted inner programs produce more. Each INSERT carries
1231     * cross-statement lineage edges pointing at its preceding SELECT via
1232     * {@link LineageRef#statementOutput}; per-pair inner lineage edges are
1233     * rebased by the current {@code out.size()} so absolute statement indices
1234     * remain valid across the concatenated program.
1235     *
1236     * <p>Safety note on the source-table fallback: this method enables
1237     * {@code provider.withSourceTableFallback(true)} so secondary sub-SELECTs
1238     * (which Resolver2 does not traverse) can still bind their column refs.
1239     * The fallback is constrained at the provider level to fire only when
1240     * Phase 2 did not run AND any explicit qualifier matches Phase 1's source —
1241     * see {@link Resolver2NameBindingProvider#bindColumn}. Current Hive
1242     * multi-insert parses always present a single FROM source, so Phase 1's
1243     * unqualified-column resolution is unambiguous in practice.
1244     */
1245    private static SemanticProgram buildHiveMultiInsert(TInsertSqlStatement insert,
1246                                                        NameBindingProvider provider) {
1247        // Slice 93 — source-table fallback strategy for Hive multi-insert.
1248        //
1249        // TSQLResolver2 does NOT process the secondary inserts in
1250        // getMultiInsertStatements(): their column refs have
1251        // resolution == null (Phase 2 did not run) even though Phase 1's
1252        // linkColumnToTable sets sourceTable. To let collectColumnRefs
1253        // accept these bindings, we enable a narrow source-table fallback
1254        // in the provider — but ONLY when every sub-SELECT has a SINGLE
1255        // FROM source (the common Hive multi-insert shape that current
1256        // parser support admits). In single-source contexts, Phase 1's
1257        // unqualified-column resolution is unambiguous; the fallback is
1258        // safe (round-2 codex Q1 BLOCKING).
1259        //
1260        // If any sub-SELECT has multiple FROM sources, Phase 1 may have
1261        // heuristically picked one source for an unqualified column —
1262        // promoting that to EXACT_MATCH could silently mis-bind. In that
1263        // case the fallback stays disabled; users must qualify column
1264        // references in the secondary branch (the qualifier-matches-source
1265        // safety in bindColumn still allows qualified refs through).
1266        boolean singleSource = isSingleSourceMultiInsert(insert);
1267        NameBindingProvider effectiveProvider = singleSource
1268                ? provider.withSourceTableFallback(true)
1269                : provider;
1270
1271        List<StatementGraph> out = new ArrayList<>();
1272        List<LineageEdge> outLineage = new ArrayList<>();
1273
1274        // Slice 109 — outer WITH on multi-insert: build the CTE bodies ONCE
1275        // upfront so each sub-SELECT's `FROM x` resolves against the shared
1276        // cteMap/publishedMap. The parser attaches the outer WITH to the
1277        // primary insert's getCteList(); sub-INSERTs in
1278        // getMultiInsertStatements() carry null cteLists. The AST handoff
1279        // mirrors the slice-104 single-target pattern but only nulls
1280        // insert.getCteList() — there is no source SELECT to move it onto
1281        // because each sub-INSERT has its own.
1282        TCTEList outerCtes = insert.getCteList();
1283        Map<String, Integer> cteMap = new HashMap<>();
1284        Map<String, List<String>> publishedMap = new HashMap<>();
1285        boolean handoffApplied = false;
1286        if (outerCtes != null && outerCtes.size() > 0) {
1287            insert.setCteList(null);
1288            handoffApplied = true;
1289            try {
1290                buildSelectCteList(outerCtes, effectiveProvider, out, outLineage,
1291                        cteMap, publishedMap,
1292                        /*allowShadowOverride=*/ false,
1293                        /*additionalAllCteNames=*/ null);
1294            } catch (RuntimeException ex) {
1295                // Restore eagerly on CTE-build failure so a downstream caller
1296                // observing the AST sees the original cteList.
1297                insert.setCteList(outerCtes);
1298                throw ex;
1299            }
1300        }
1301
1302        try {
1303            // Primary INSERT (first target)
1304            appendOneHiveInsert(insert, effectiveProvider, out, outLineage,
1305                    cteMap, publishedMap);
1306
1307            // Additional INSERTs from getMultiInsertStatements()
1308            for (Object miObj : insert.getMultiInsertStatements()) {
1309                appendOneHiveInsert((TInsertSqlStatement) miObj, effectiveProvider,
1310                        out, outLineage, cteMap, publishedMap);
1311            }
1312        } finally {
1313            if (handoffApplied) {
1314                insert.setCteList(outerCtes);
1315            }
1316        }
1317
1318        return new SemanticProgram(out, outLineage);
1319    }
1320
1321    /**
1322     * Slice 93 — true when every INSERT-SELECT pair in a Hive multi-insert
1323     * block has a single FROM source (i.e., one entry in
1324     * {@code subQuery.getTables()}). Guards the source-table fallback so
1325     * Phase 1's heuristic source assignment is only trusted in contexts
1326     * where it is provably unambiguous (round-2 codex Q1 BLOCKING).
1327     */
1328    private static boolean isSingleSourceMultiInsert(TInsertSqlStatement insert) {
1329        if (!isSingleSourceSubQuery(insert.getSubQuery())) {
1330            return false;
1331        }
1332        for (Object miObj : insert.getMultiInsertStatements()) {
1333            TInsertSqlStatement mi = (TInsertSqlStatement) miObj;
1334            if (!isSingleSourceSubQuery(mi.getSubQuery())) {
1335                return false;
1336            }
1337        }
1338        return true;
1339    }
1340
1341    private static boolean isSingleSourceSubQuery(TSelectSqlStatement sel) {
1342        return sel != null && sel.getTables() != null && sel.getTables().size() == 1;
1343    }
1344
1345    /**
1346     * Build one INSERT-SELECT pair into {@code out} / {@code outLineage}.
1347     * Called by {@link #buildHiveMultiInsert} for the primary and each
1348     * additional INSERT in a Hive multi-insert block. Each call validates
1349     * the target/source, builds the source SELECT via {@link #build}, and
1350     * delegates the post-build INSERT-graph assembly to
1351     * {@link #assembleInsertGraphAndLineage} so the layout exactly mirrors
1352     * the single-target slice-78 INSERT path (the helper also handles
1353     * inner-lineage rebasing when {@code out} is non-empty).
1354     */
1355    private static void appendOneHiveInsert(TInsertSqlStatement insert,
1356                                            NameBindingProvider provider,
1357                                            List<StatementGraph> out,
1358                                            List<LineageEdge> outLineage,
1359                                            Map<String, Integer> cteMap,
1360                                            Map<String, List<String>> publishedMap) {
1361        TTable targetTable = insert.getTargetTable();
1362        if (targetTable == null || targetTable.getTableName() == null) {
1363            throw new SemanticIRBuildException(Diagnostic.error(
1364                    DiagnosticCode.INSERT_TARGET_MISSING,
1365                    "Hive multi-insert: INSERT has no resolvable target table",
1366                    insert));
1367        }
1368        String targetQName = targetTable.getTableName().toString();
1369        if (targetQName.isEmpty()) {
1370            throw new SemanticIRBuildException(Diagnostic.error(
1371                    DiagnosticCode.INSERT_TARGET_MISSING,
1372                    "Hive multi-insert: INSERT target table name is empty",
1373                    insert));
1374        }
1375        TSelectSqlStatement source = insert.getSubQuery();
1376        if (source == null) {
1377            throw new SemanticIRBuildException(Diagnostic.error(
1378                    DiagnosticCode.INSERT_SOURCE_NOT_SUPPORTED,
1379                    "Hive multi-insert: INSERT has no source SELECT",
1380                    insert));
1381        }
1382
1383        // Slice 109 — when outer CTEs are present (cteMap non-empty), build
1384        // the source SELECT via buildSelectBodyAfterCteWalk directly into
1385        // out/outLineage so it sees the shared cteMap/publishedMap. The
1386        // slice-93 path (no outer CTEs) keeps the build(source, provider) +
1387        // assembleInsertGraphAndLineage flow unchanged.
1388        if (cteMap.isEmpty()) {
1389            SemanticProgram inner = build(source, provider);
1390            // Hive has no RETURNING/OUTPUT — pass null clauses directly.
1391            assembleInsertGraphAndLineage(
1392                    insert, targetTable, targetQName, inner,
1393                    "Hive multi-insert: INSERT",
1394                    /*returningClause=*/ null,
1395                    /*outputClause=*/ null,
1396                    out, outLineage, provider);
1397            return;
1398        }
1399
1400        // Slice 109 — defensive: parser probe shows sub-SELECTs in Hive
1401        // multi-insert do NOT carry their own cteList. If a future parser
1402        // change ever attached one, mixed outer+inner WITH semantics would
1403        // need slice-107/108-style two-pass walker support; until then the
1404        // shape rejects with the existing mixed-WITH code.
1405        if (source.getCteList() != null && source.getCteList().size() > 0) {
1406            throw new SemanticIRBuildException(Diagnostic.error(
1407                    DiagnosticCode.INSERT_MIXED_OUTER_AND_INNER_WITH_NOT_SUPPORTED,
1408                    "Hive multi-insert: mixed outer + inner WITH on a "
1409                            + "sub-SELECT is not supported by "
1410                            + "SemanticIRBuilder.buildHiveMultiInsert; "
1411                            + "slice 109 admits outer-only WITH on multi-insert",
1412                    insert));
1413        }
1414
1415        // Snapshot out.size() so the source SELECT and its inner extractions
1416        // are pinned to known positions. The slice-23 EXISTS-extraction and
1417        // FROM-subquery extraction paths inside buildSelectBodyAfterCteWalk
1418        // append directly to out/outLineage; the source SELECT lands LAST.
1419        int beforeSelectIdx = out.size();
1420        buildSelectBodyAfterCteWalk(source, provider, out, outLineage,
1421                cteMap, publishedMap,
1422                /*hasOuterCteListAlreadyProcessed=*/ true);
1423        if (out.size() <= beforeSelectIdx) {
1424            // Defensive: buildSelectBodyAfterCteWalk always appends at least
1425            // the source SELECT; this branch is unreachable in practice.
1426            throw new SemanticIRBuildException(Diagnostic.error(
1427                    DiagnosticCode.INSERT_SOURCE_NOT_SUPPORTED,
1428                    "Hive multi-insert: INSERT source built no statements",
1429                    insert));
1430        }
1431        int selectIdx = out.size() - 1;
1432        assembleInsertTargetGraphFromAppended(
1433                insert, targetTable, targetQName, selectIdx,
1434                "Hive multi-insert: INSERT",
1435                /*returningClause=*/ null,
1436                /*outputClause=*/ null,
1437                out, outLineage, provider);
1438    }
1439
1440    /**
1441     * Slice 93 — shared INSERT-graph assembly used by both the slice-78
1442     * single-target {@link #buildInsert} and the slice-93 Hive multi-insert
1443     * {@link #appendOneHiveInsert}. Appends {@code inner.getStatements()}
1444     * to {@code out} (rebasing {@code inner.getLineage()}'s STATEMENT_OUTPUT
1445     * indices when {@code out} is non-empty), then appends an INSERT-kind
1446     * {@link StatementGraph} and per-source-output cross-statement
1447     * {@link LineageEdge}s.
1448     *
1449     * <p>Discriminators between the two callers:
1450     * <ul>
1451     *   <li>{@code diagnosticPrefix} is woven into column-count-mismatch
1452     *       and empty-inner-source error messages so the originating call
1453     *       site is identifiable.</li>
1454     *   <li>{@code returningClause} / {@code outputClause} are passed
1455     *       directly to {@link #buildReturningColumns} (slice 78 supplies
1456     *       the INSERT's RETURNING/OUTPUT clauses; slice 93's Hive path
1457     *       passes {@code null}/{@code null} since Hive has no
1458     *       RETURNING/OUTPUT). Passing the clauses directly keeps the
1459     *       discriminator visible at every call site rather than hidden
1460     *       behind a boolean (round-2 codex Q3 suggestion).</li>
1461     * </ul>
1462     *
1463     * <p>Mutates both {@code out} and {@code outLineage}.
1464     */
1465    private static void assembleInsertGraphAndLineage(
1466            TInsertSqlStatement insert,
1467            TTable targetTable,
1468            String targetQName,
1469            SemanticProgram inner,
1470            String diagnosticPrefix,
1471            TReturningClause returningClause,
1472            TOutputClause outputClause,
1473            List<StatementGraph> out,
1474            List<LineageEdge> outLineage,
1475            NameBindingProvider provider) {
1476        List<StatementGraph> innerStmts = inner.getStatements();
1477        if (innerStmts.isEmpty()) {
1478            // Defensive: build() always returns at least one statement when
1479            // it doesn't throw. This branch is unreachable in practice but
1480            // surfaces a structured diagnostic instead of an
1481            // IndexOutOfBoundsException on the sourceOuter access below.
1482            throw new SemanticIRBuildException(Diagnostic.error(
1483                    DiagnosticCode.INSERT_SOURCE_NOT_SUPPORTED,
1484                    diagnosticPrefix + " source built no statements",
1485                    insert));
1486        }
1487
1488        // Rebase inner lineage edges by the current out.size() offset
1489        // (round-2 codex Q4 BLOCKING). For the slice-78 single-target
1490        // path out is empty (offset=0) so rebase is a no-op; for the
1491        // slice-93 Hive path each subsequent INSERT-SELECT pair adds
1492        // an offset matching the absolute position of its inner block.
1493        int offset = out.size();
1494        int selectIdx = offset + innerStmts.size() - 1;
1495        out.addAll(innerStmts);
1496        for (LineageEdge e : inner.getLineage()) {
1497            outLineage.add(rebaseLineageEdge(e, offset));
1498        }
1499
1500        StatementGraph sourceOuter = innerStmts.get(innerStmts.size() - 1);
1501        List<OutputColumn> sourceOutputs = sourceOuter.getOutputColumns();
1502        int sourceOutCount = sourceOutputs.size();
1503
1504        // Optional explicit INSERT column list. Verbatim bare-name
1505        // spelling per slice-78 contract; arity mismatch rejects.
1506        TObjectNameList colList = insert.getColumnList();
1507        List<String> targetColumnNames = new ArrayList<>();
1508        if (colList != null && colList.size() > 0) {
1509            for (int i = 0; i < colList.size(); i++) {
1510                TObjectName n = colList.getObjectName(i);
1511                targetColumnNames.add(n == null ? "" : n.toString());
1512            }
1513            if (targetColumnNames.size() != sourceOutCount) {
1514                throw new SemanticIRBuildException(Diagnostic.error(
1515                        DiagnosticCode.INSERT_COLUMN_COUNT_MISMATCH,
1516                        diagnosticPrefix + " column list has "
1517                                + targetColumnNames.size()
1518                                + " column(s) but source SELECT produced "
1519                                + sourceOutCount + " output(s)",
1520                        insert));
1521            }
1522        }
1523
1524        // INSERT StatementGraph — slice-78 single-target shape with the
1525        // source SELECT as a SUBQUERY-kind relation entry.
1526        String sourceName = sourceOuter.getName();
1527        String sourceRelAlias = (sourceName != null && !sourceName.isEmpty())
1528                ? sourceName : "__insert_source__";
1529        RelationBinding sourceBinding = new RelationBinding(
1530                RelationKind.SUBQUERY, sourceRelAlias);
1531        List<RelationSource> insertRelations = new ArrayList<>();
1532        insertRelations.add(new RelationSource(sourceRelAlias, sourceBinding));
1533
1534        RelationBinding targetBinding = new RelationBinding(
1535                RelationKind.TABLE, targetQName);
1536        TargetRelation target = new TargetRelation(targetBinding, targetColumnNames);
1537
1538        int insertIdx = out.size();
1539        String insertTargetAlias = effectiveAliasOf(targetTable);
1540        if (insertTargetAlias == null || insertTargetAlias.isEmpty()) {
1541            insertTargetAlias = targetQName;
1542        }
1543        // Slice 85: RETURNING/OUTPUT projections. Clauses are passed
1544        // through directly from the call site (slice-78 single-target
1545        // forwards the INSERT's own clauses; slice-93 Hive multi-insert
1546        // forwards null/null since Hive has no RETURNING/OUTPUT).
1547        List<OutputColumn> returningCols = buildReturningColumns(
1548                returningClause,
1549                outputClause,
1550                "INSERT",
1551                targetQName,
1552                insertTargetAlias,
1553                targetTable,
1554                /*fromSideRelations=*/ Collections.<RelationSource>emptyList(),
1555                provider,
1556                insertIdx,
1557                outLineage,
1558                insert);
1559
1560        StatementGraph insertOuter = new StatementGraph(
1561                /*name=*/ null,
1562                "INSERT",
1563                insertRelations,
1564                /*outputColumns=*/ Collections.<OutputColumn>emptyList(),
1565                returningCols,
1566                /*filterColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1567                /*joinColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1568                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1569                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1570                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1571                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1572                /*distinct=*/ false,
1573                /*setOperator=*/ null,
1574                /*rowLimit=*/ null,
1575                target);
1576        out.add(insertOuter);
1577
1578        // Cross-statement lineage: target.col_i ← STATEMENT_OUTPUT(selectIdx, srcName_i)
1579        for (int i = 0; i < sourceOutCount; i++) {
1580            String srcName = sourceOutputs.get(i).getName();
1581            String tgtName = (i < targetColumnNames.size())
1582                    ? targetColumnNames.get(i) : srcName;
1583            if (tgtName == null || tgtName.isEmpty()) {
1584                continue;
1585            }
1586            outLineage.add(new LineageEdge(
1587                    LineageRef.tableColumn(targetQName, tgtName),
1588                    LineageRef.statementOutput(selectIdx, srcName)));
1589        }
1590    }
1591
1592    /**
1593     * Slice 109 — assemble the INSERT-target half (TargetRelation, INSERT
1594     * StatementGraph, RETURNING/OUTPUT projections, and cross-statement
1595     * lineage edges) when the source SELECT and its inner extractions have
1596     * ALREADY been appended directly to {@code out}/{@code outLineage} by
1597     * {@link #buildSelectBodyAfterCteWalk}. The slice-93
1598     * {@link #assembleInsertGraphAndLineage} helper, by contrast, takes a
1599     * pre-built {@link SemanticProgram} and rebases STATEMENT_OUTPUT
1600     * indices on the way in — that path is unused here because the source
1601     * SELECT was already built into absolute positions in {@code out}.
1602     *
1603     * <p>{@code selectIdx} must be the position of the source SELECT in
1604     * {@code out} (last statement appended by the caller before this helper
1605     * runs). RETURNING/OUTPUT clauses are passed directly (Hive multi-
1606     * insert callers pass {@code null}/{@code null}); other DMLs that
1607     * adopt this helper later can forward their own.
1608     */
1609    private static void assembleInsertTargetGraphFromAppended(
1610            TInsertSqlStatement insert,
1611            TTable targetTable,
1612            String targetQName,
1613            int selectIdx,
1614            String diagnosticPrefix,
1615            TReturningClause returningClause,
1616            TOutputClause outputClause,
1617            List<StatementGraph> out,
1618            List<LineageEdge> outLineage,
1619            NameBindingProvider provider) {
1620        StatementGraph sourceOuter = out.get(selectIdx);
1621        List<OutputColumn> sourceOutputs = sourceOuter.getOutputColumns();
1622        int sourceOutCount = sourceOutputs.size();
1623
1624        TObjectNameList colList = insert.getColumnList();
1625        List<String> targetColumnNames = new ArrayList<>();
1626        if (colList != null && colList.size() > 0) {
1627            for (int i = 0; i < colList.size(); i++) {
1628                TObjectName n = colList.getObjectName(i);
1629                targetColumnNames.add(n == null ? "" : n.toString());
1630            }
1631            if (targetColumnNames.size() != sourceOutCount) {
1632                throw new SemanticIRBuildException(Diagnostic.error(
1633                        DiagnosticCode.INSERT_COLUMN_COUNT_MISMATCH,
1634                        diagnosticPrefix + " column list has "
1635                                + targetColumnNames.size()
1636                                + " column(s) but source SELECT produced "
1637                                + sourceOutCount + " output(s)",
1638                        insert));
1639            }
1640        }
1641
1642        String sourceName = sourceOuter.getName();
1643        String sourceRelAlias = (sourceName != null && !sourceName.isEmpty())
1644                ? sourceName : "__insert_source__";
1645        RelationBinding sourceBinding = new RelationBinding(
1646                RelationKind.SUBQUERY, sourceRelAlias);
1647        List<RelationSource> insertRelations = new ArrayList<>();
1648        insertRelations.add(new RelationSource(sourceRelAlias, sourceBinding));
1649
1650        RelationBinding targetBinding = new RelationBinding(
1651                RelationKind.TABLE, targetQName);
1652        TargetRelation target = new TargetRelation(targetBinding, targetColumnNames);
1653
1654        int insertIdx = out.size();
1655        String insertTargetAlias = effectiveAliasOf(targetTable);
1656        if (insertTargetAlias == null || insertTargetAlias.isEmpty()) {
1657            insertTargetAlias = targetQName;
1658        }
1659        List<OutputColumn> returningCols = buildReturningColumns(
1660                returningClause,
1661                outputClause,
1662                "INSERT",
1663                targetQName,
1664                insertTargetAlias,
1665                targetTable,
1666                /*fromSideRelations=*/ Collections.<RelationSource>emptyList(),
1667                provider,
1668                insertIdx,
1669                outLineage,
1670                insert);
1671
1672        StatementGraph insertOuter = new StatementGraph(
1673                /*name=*/ null,
1674                "INSERT",
1675                insertRelations,
1676                /*outputColumns=*/ Collections.<OutputColumn>emptyList(),
1677                returningCols,
1678                /*filterColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1679                /*joinColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1680                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1681                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1682                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1683                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1684                /*distinct=*/ false,
1685                /*setOperator=*/ null,
1686                /*rowLimit=*/ null,
1687                target);
1688        out.add(insertOuter);
1689
1690        for (int i = 0; i < sourceOutCount; i++) {
1691            String srcName = sourceOutputs.get(i).getName();
1692            String tgtName = (i < targetColumnNames.size())
1693                    ? targetColumnNames.get(i) : srcName;
1694            if (tgtName == null || tgtName.isEmpty()) {
1695                continue;
1696            }
1697            outLineage.add(new LineageEdge(
1698                    LineageRef.tableColumn(targetQName, tgtName),
1699                    LineageRef.statementOutput(selectIdx, srcName)));
1700        }
1701    }
1702
1703    /**
1704     * Slice 93 — rebase a {@link LineageEdge}'s {@code STATEMENT_OUTPUT}
1705     * statement indices by {@code offset}. {@code TABLE_COLUMN} refs are
1706     * returned unchanged. Used to concatenate inner {@link SemanticProgram}s
1707     * into a larger one (Hive multi-insert: each INSERT-SELECT pair's inner
1708     * program contributes its own block of statements).
1709     */
1710    private static LineageEdge rebaseLineageEdge(LineageEdge e, int offset) {
1711        if (offset == 0) {
1712            return e;
1713        }
1714        LineageRef from = rebaseLineageRef(e.getFrom(), offset);
1715        LineageRef to = rebaseLineageRef(e.getTo(), offset);
1716        if (from == e.getFrom() && to == e.getTo()) {
1717            return e;
1718        }
1719        return new LineageEdge(from, to);
1720    }
1721
1722    private static LineageRef rebaseLineageRef(LineageRef ref, int offset) {
1723        if (ref == null) {
1724            return null;
1725        }
1726        if (ref.getKind() != LineageRef.Kind.STATEMENT_OUTPUT) {
1727            return ref;
1728        }
1729        return LineageRef.statementOutput(
1730                ref.getStatementIndex() + offset, ref.getOutputName());
1731    }
1732
1733    /**
1734     * Slice 79 — admit a single {@code CREATE TABLE target [(c1, ...)] AS
1735     * SELECT ...} (CTAS) statement. Builds the source SELECT via
1736     * {@link #build} unchanged, then appends a {@code "CREATE_TABLE"}-
1737     * kind {@link StatementGraph} carrying the target relation and
1738     * cross-statement lineage edges (mirrors slice-78 INSERT).
1739     *
1740     * <p>Admitted shape: {@code CREATE [OR REPLACE] TABLE target
1741     * [(c1, c2, ...)] AS <subquery-SELECT>}. Plain
1742     * {@code CREATE TABLE target (a INT, b VARCHAR)} (column DDL with
1743     * no AS SELECT) is rejected via
1744     * {@link DiagnosticCode#CREATE_AS_NO_SOURCE_SELECT}. Explicit
1745     * column-list arity mismatch surfaces as
1746     * {@link DiagnosticCode#CREATE_AS_COLUMN_COUNT_MISMATCH}; a
1747     * missing / empty target name surfaces (defensively) as
1748     * {@link DiagnosticCode#CREATE_AS_TARGET_MISSING}.
1749     *
1750     * <p>For CTAS the explicit column-list spellings come from
1751     * {@link TCreateTableSqlStatement#getColumnList()} — only the bare
1752     * column name from each {@link TColumnDefinition} is consumed;
1753     * data-type tokens are ignored by slice 79.
1754     */
1755    public static SemanticProgram buildCreateTable(TCreateTableSqlStatement create,
1756                                                   NameBindingProvider provider) {
1757        if (create == null) {
1758            throw new IllegalArgumentException("create must not be null");
1759        }
1760        if (provider == null) {
1761            throw new IllegalArgumentException("provider must not be null");
1762        }
1763
1764        // Target name extraction. CTAS exposes the target via the
1765        // TCustomSqlStatement-inherited getTargetTable(); the explicit
1766        // getTableName() is a thin wrapper around tables[0].getTableName()
1767        // and also works. Use getTableName() for symmetry with the
1768        // slice-78 INSERT path.
1769        TObjectName targetName = create.getTableName();
1770        if (targetName == null) {
1771            throw new SemanticIRBuildException(Diagnostic.error(
1772                    DiagnosticCode.CREATE_AS_TARGET_MISSING,
1773                    "CREATE TABLE has no resolvable target table name",
1774                    create));
1775        }
1776        String targetQName = targetName.toString();
1777        if (targetQName == null || targetQName.isEmpty()) {
1778            throw new SemanticIRBuildException(Diagnostic.error(
1779                    DiagnosticCode.CREATE_AS_TARGET_MISSING,
1780                    "CREATE TABLE target table name is empty",
1781                    create));
1782        }
1783
1784        TSelectSqlStatement source = create.getSubQuery();
1785        if (source == null) {
1786            throw new SemanticIRBuildException(Diagnostic.error(
1787                    DiagnosticCode.CREATE_AS_NO_SOURCE_SELECT,
1788                    "CREATE TABLE has no AS SELECT subquery; slice 79 admits "
1789                            + "CTAS (CREATE TABLE <target> [(c1, ...)] AS SELECT ...) only",
1790                    create));
1791        }
1792
1793        // Pull explicit column-list spellings BEFORE building the inner
1794        // — keeps the error path cheap for the structural-invalid case
1795        // (CTAS with column count mismatch is detected after the inner
1796        // build because we don't know the source output count yet).
1797        List<String> targetColumnNames = new ArrayList<>();
1798        TColumnDefinitionList colList = create.getColumnList();
1799        if (colList != null && colList.size() > 0) {
1800            for (int i = 0; i < colList.size(); i++) {
1801                TColumnDefinition cd = colList.getColumn(i);
1802                TObjectName n = (cd == null) ? null : cd.getColumnName();
1803                String spelling = (n == null) ? "" : n.toString();
1804                targetColumnNames.add(spelling);
1805            }
1806        }
1807
1808        return assembleCreateLikeProgram(create, source, provider,
1809                "CREATE_TABLE", targetQName, targetColumnNames);
1810    }
1811
1812    /**
1813     * Slice 79 — admit a single
1814     * {@code CREATE [OR REPLACE] VIEW v [(c1, ...)] AS SELECT ...}
1815     * statement. Mirrors {@link #buildCreateTable} except the source
1816     * SELECT is fetched via {@link TCreateViewSqlStatement#getSubquery()}
1817     * (lowercase 'q'), the target name from
1818     * {@link TCreateViewSqlStatement#getViewName()}, and the explicit
1819     * column-list spellings from {@link TViewAliasClause} on the AST.
1820     */
1821    public static SemanticProgram buildCreateView(TCreateViewSqlStatement create,
1822                                                  NameBindingProvider provider) {
1823        if (create == null) {
1824            throw new IllegalArgumentException("create must not be null");
1825        }
1826        if (provider == null) {
1827            throw new IllegalArgumentException("provider must not be null");
1828        }
1829
1830        TObjectName viewName = create.getViewName();
1831        if (viewName == null) {
1832            throw new SemanticIRBuildException(Diagnostic.error(
1833                    DiagnosticCode.CREATE_AS_TARGET_MISSING,
1834                    "CREATE VIEW has no resolvable view name",
1835                    create));
1836        }
1837        String targetQName = viewName.toString();
1838        if (targetQName == null || targetQName.isEmpty()) {
1839            throw new SemanticIRBuildException(Diagnostic.error(
1840                    DiagnosticCode.CREATE_AS_TARGET_MISSING,
1841                    "CREATE VIEW target view name is empty",
1842                    create));
1843        }
1844
1845        TSelectSqlStatement source = create.getSubquery();
1846        if (source == null) {
1847            throw new SemanticIRBuildException(Diagnostic.error(
1848                    DiagnosticCode.CREATE_AS_NO_SOURCE_SELECT,
1849                    "CREATE VIEW has no AS SELECT subquery; slice 79 admits "
1850                            + "CREATE VIEW <target> [(c1, ...)] AS SELECT ... only",
1851                    create));
1852        }
1853
1854        // View-side explicit column aliases via viewAliasClause. Items
1855        // whose alias is null are preserved as empty-string entries so
1856        // a parser-quirk gap doesn't silently collapse the list and
1857        // shift later aliases onto wrong source-output positions —
1858        // count-mismatch detection downstream stays accurate
1859        // (codex diff-review round 1 P2 catch).
1860        List<String> targetColumnNames = new ArrayList<>();
1861        TViewAliasClause aliasClause = create.getViewAliasClause();
1862        if (aliasClause != null) {
1863            TViewAliasItemList items = aliasClause.getViewAliasItemList();
1864            if (items != null) {
1865                for (int i = 0; i < items.size(); i++) {
1866                    TViewAliasItem item = items.getViewAliasItem(i);
1867                    TObjectName alias = (item == null) ? null : item.getAlias();
1868                    String spelling = (alias == null) ? "" : alias.toString();
1869                    targetColumnNames.add(spelling);
1870                }
1871            }
1872        }
1873
1874        return assembleCreateLikeProgram(create, source, provider,
1875                "CREATE_VIEW", targetQName, targetColumnNames);
1876    }
1877
1878    /**
1879     * Shared assembly path for slice-79 CTAS / CREATE VIEW. Given a
1880     * pre-validated target name and the (possibly empty) list of
1881     * explicit column-list spellings, builds the source SELECT,
1882     * validates column-list arity, and emits the outer
1883     * StatementGraph + cross-stmt lineage edges. Mirrors the
1884     * post-source half of slice-78 {@link #buildInsert}.
1885     */
1886    private static SemanticProgram assembleCreateLikeProgram(
1887            TParseTreeNode anchor, TSelectSqlStatement source,
1888            NameBindingProvider provider, String outerKind,
1889            String targetQName, List<String> targetColumnNames) {
1890        SemanticProgram inner = build(source, provider);
1891        List<StatementGraph> innerStmts = inner.getStatements();
1892        if (innerStmts.isEmpty()) {
1893            throw new SemanticIRBuildException(Diagnostic.error(
1894                    DiagnosticCode.CREATE_AS_NO_SOURCE_SELECT,
1895                    "CREATE source built no statements",
1896                    anchor));
1897        }
1898        int selectIdx = innerStmts.size() - 1;
1899        StatementGraph sourceOuter = innerStmts.get(selectIdx);
1900        List<OutputColumn> sourceOutputs = sourceOuter.getOutputColumns();
1901        int sourceOutCount = sourceOutputs.size();
1902
1903        if (!targetColumnNames.isEmpty()
1904                && targetColumnNames.size() != sourceOutCount) {
1905            throw new SemanticIRBuildException(Diagnostic.error(
1906                    DiagnosticCode.CREATE_AS_COLUMN_COUNT_MISMATCH,
1907                    outerKind.equals("CREATE_TABLE")
1908                            ? ("CREATE TABLE column list has " + targetColumnNames.size()
1909                                    + " column(s) but source SELECT produced "
1910                                    + sourceOutCount + " output(s)")
1911                            : ("CREATE VIEW alias list has " + targetColumnNames.size()
1912                                    + " column(s) but source SELECT produced "
1913                                    + sourceOutCount + " output(s)"),
1914                    anchor));
1915        }
1916
1917        String sourceName = sourceOuter.getName();
1918        String sourceRelAlias = (sourceName != null && !sourceName.isEmpty())
1919                ? sourceName : "__create_source__";
1920        RelationBinding sourceBinding = new RelationBinding(
1921                RelationKind.SUBQUERY, sourceRelAlias);
1922        List<RelationSource> createRelations = new ArrayList<>();
1923        createRelations.add(new RelationSource(sourceRelAlias, sourceBinding));
1924
1925        RelationBinding targetBinding = new RelationBinding(
1926                RelationKind.TABLE, targetQName);
1927        TargetRelation target = new TargetRelation(targetBinding, targetColumnNames);
1928
1929        List<StatementGraph> out = new ArrayList<>(innerStmts.size() + 1);
1930        out.addAll(innerStmts);
1931        List<LineageEdge> outLineage = new ArrayList<>(inner.getLineage());
1932
1933        StatementGraph createOuter = new StatementGraph(
1934                /*name=*/ null,
1935                outerKind,
1936                createRelations,
1937                /*outputColumns=*/ Collections.<OutputColumn>emptyList(),
1938                /*filterColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1939                /*joinColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1940                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1941                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1942                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1943                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
1944                /*distinct=*/ false,
1945                /*setOperator=*/ null,
1946                /*rowLimit=*/ null,
1947                target);
1948        out.add(createOuter);
1949
1950        for (int i = 0; i < sourceOutCount; i++) {
1951            String srcName = sourceOutputs.get(i).getName();
1952            String tgtName = (i < targetColumnNames.size())
1953                    ? targetColumnNames.get(i) : srcName;
1954            if (tgtName == null || tgtName.isEmpty()) {
1955                continue;
1956            }
1957            outLineage.add(new LineageEdge(
1958                    LineageRef.tableColumn(targetQName, tgtName),
1959                    LineageRef.statementOutput(selectIdx, srcName)));
1960        }
1961
1962        return new SemanticProgram(out, outLineage);
1963    }
1964
1965    /**
1966     * Slice 80 / 82 — admit {@code UPDATE target SET c1 = expr1,
1967     * c2 = expr2, ... [FROM source_list] [WHERE pred]} statements.
1968     * Emits one {@code "UPDATE"}-kind {@link StatementGraph} carrying
1969     * the target relation plus synthetic {@link OutputColumn} entries
1970     * per SET assignment (output name = SET LHS verbatim spelling;
1971     * sources = column refs collected from the RHS expression).
1972     * Optional WHERE refs surface on
1973     * {@link StatementGraph#getFilterColumnRefs()}.
1974     *
1975     * <p>Slice 82 lifts the slice-80 {@code UPDATE_JOINED_NOT_SUPPORTED}
1976     * reject for the common PG / MSSQL / BigQuery / Snowflake / Redshift
1977     * FROM-side joined UPDATE shapes. The IR shape gains two slots:
1978     * {@code relations[]} now carries TABLE-kind RelationSources for
1979     * FROM-side sources (slice 80 left empty), and
1980     * {@code joinColumnRefs[]} now carries ON-clause column refs from
1981     * FROM-side JOINs. The target stays on
1982     * {@link StatementGraph#getTarget()}; a reference-identity filter
1983     * excludes the target's own TTable instance from {@code relations[]}.
1984     *
1985     * <p>Admitted shape:
1986     * <ul>
1987     *   <li>Single-target UPDATE without FROM (slice 80) —
1988     *       {@code relations[]} stays empty.</li>
1989     *   <li>PG / BQ / SF / RS {@code UPDATE t SET ... FROM source}
1990     *       (single FROM source).</li>
1991     *   <li>PG / BQ {@code UPDATE t SET ... FROM s1, s2, ...}
1992     *       (comma-FROM list).</li>
1993     *   <li>PG / MSSQL {@code UPDATE t SET ... FROM s1 [INNER|LEFT|RIGHT|FULL OUTER] JOIN s2 ON ...}
1994     *       — ON refs populate {@code joinColumnRefs[]}.</li>
1995     *   <li>MSSQL {@code UPDATE t SET ... FROM t INNER JOIN s ON ...}
1996     *       — target may appear in FROM; reference-identity filter
1997     *       excludes the target's own TTable instance from
1998     *       {@code relations[]}.</li>
1999     *   <li>Explicit {@code CROSS JOIN} (no ON; semantically equivalent
2000     *       to comma-FROM).</li>
2001     *   <li>SET LHS is a {@link EExpressionType#simple_object_name_t}
2002     *       column reference (qualified {@code t.x} or bare {@code x}).
2003     *       Oracle tuple {@code SET (a, b) = (...)} (LHS = list_t)
2004     *       rejects via
2005     *       {@link DiagnosticCode#UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED}.</li>
2006     *   <li>SET RHS may be any expression NOT containing a scalar
2007     *       subquery and NOT containing a window function. Subqueries
2008     *       reject via
2009     *       {@link DiagnosticCode#UPDATE_SET_HAS_SUBQUERY_NOT_SUPPORTED};
2010     *       window functions reuse the existing
2011     *       {@link DiagnosticCode#CLAUSE_WINDOW_FUNCTION_LEAK}
2012     *       routed through {@link #rejectWindowFunctionInScope}.</li>
2013     *   <li>Optional WHERE clause — existing WHERE-side rejects
2014     *       (subqueries, window functions) continue to apply via the
2015     *       shared {@link #containsAnySubquery} +
2016     *       {@code rejectWindowFunctionInScope} helpers used by SELECT
2017     *       WHERE.</li>
2018     * </ul>
2019     *
2020     * <p>Slice 82 reject scope, with slice 83 admitting subquery FROM
2021     * sources (the slice-82 {@code UPDATE_FROM_SUBQUERY_NOT_SUPPORTED}
2022     * code stays declared but unreached — slice-71/72
2023     * retain-for-documentation precedent):
2024     * <ul>
2025     *   <li>Subquery as a FROM source — slice 83 admits via the
2026     *       SELECT-side {@code processDirectSubqueryTable} extractor,
2027     *       publishing a SUBQUERY-kind {@link RelationSource} and a
2028     *       cross-statement {@link LineageEdge} per subquery-bound
2029     *       output source.</li>
2030     *   <li>USING in any FROM-side join item —
2031     *       {@link DiagnosticCode#UPDATE_FROM_JOIN_USING_NOT_SUPPORTED}.</li>
2032     *   <li>NATURAL JOIN in any FROM-side join item —
2033     *       {@link DiagnosticCode#UPDATE_FROM_JOIN_NATURAL_NOT_SUPPORTED}.</li>
2034     *   <li>Subquery in any ON condition —
2035     *       {@link DiagnosticCode#UPDATE_JOIN_ON_HAS_SUBQUERY_NOT_SUPPORTED}.</li>
2036     *   <li>Window function in any ON condition — reuses
2037     *       {@link DiagnosticCode#CLAUSE_WINDOW_FUNCTION_LEAK} via
2038     *       {@link #rejectWindowFunctionInScope}.</li>
2039     * </ul>
2040     *
2041     * <p>Deferred (rejected at the outer level before any SET
2042     * processing):
2043     * <ul>
2044     *   <li>Top-level WITH on UPDATE →
2045     *       {@link DiagnosticCode#UPDATE_CTE_NOT_SUPPORTED}.</li>
2046     *   <li>RETURNING projection (PG / Oracle) →
2047     *       {@link DiagnosticCode#UPDATE_RETURNING_CLAUSE_NOT_SUPPORTED}.</li>
2048     *   <li>OUTPUT projection (SQL Server) →
2049     *       {@link DiagnosticCode#UPDATE_OUTPUT_CLAUSE_NOT_SUPPORTED}.</li>
2050     *   <li>ORDER BY / LIMIT on UPDATE (MySQL / Couchbase) →
2051     *       {@link DiagnosticCode#UPDATE_ORDER_BY_OR_LIMIT_NOT_SUPPORTED}.</li>
2052     *   <li>Empty / missing SET clause, Couchbase UNSET-only updates →
2053     *       {@link DiagnosticCode#UPDATE_NO_SET_CLAUSE}.</li>
2054     *   <li>Missing target table (defensive) →
2055     *       {@link DiagnosticCode#UPDATE_TARGET_MISSING}.</li>
2056     * </ul>
2057     *
2058     * <p>Cross-statement {@link LineageEdge}s, one per SET assignment:
2059     * <pre>
2060     *   from = LineageRef.tableColumn(targetQName, target_col_i)
2061     *   to   = LineageRef.statementOutput(0, output_name_i)
2062     * </pre>
2063     * Statement index 0 is the UPDATE statement itself — the synthetic
2064     * output IS the per-assignment "projection" that flows into the
2065     * target column. This is the slice-78 INSERT contract
2066     * (TABLE_COLUMN → STATEMENT_OUTPUT) with the source SELECT replaced
2067     * by the UPDATE's own per-assignment outputs; consumers read
2068     * {@code outputs[i].sources} to enumerate the RHS column refs that
2069     * feed the target column.
2070     */
2071    public static SemanticProgram buildUpdate(TUpdateSqlStatement update,
2072                                              NameBindingProvider provider) {
2073        if (update == null) {
2074            throw new IllegalArgumentException("update must not be null");
2075        }
2076        if (provider == null) {
2077            throw new IllegalArgumentException("provider must not be null");
2078        }
2079
2080        // Slice 86 — defensive UsingScope reset at entry so a parent
2081        // scope cannot leak into UPDATE's binding decisions. Mirrors
2082        // SELECT-side buildSelectStatementImpl (slice 65). The UPDATE's
2083        // own UsingScope is installed after the FROM-join walker (step
2084        // 5.8 below).
2085        provider = provider.withUsingScope(UsingScope.EMPTY);
2086
2087        // 1) Slice 105 — admit top-level WITH on UPDATE. Walks the CTE
2088        // list left-to-right, building each body as a preceding
2089        // StatementGraph and producing cteNameToStatementIndex +
2090        // ctePublishedColumns for the FROM-as-CTE branch in
2091        // buildUpdateRelation below. Mirrors the slice-101 MERGE walker.
2092        // `stmts` / `lineage` allocated here (hoisted from the prior
2093        // slice-83 location) so the CTE walker can append.
2094        // UPDATE_CTE_NOT_SUPPORTED stays declared-but-unreached
2095        // (slice 71/72/82/86/95/96/97/98/99/100/101/102/103/104 precedent).
2096        List<StatementGraph> stmts = new ArrayList<>();
2097        List<LineageEdge> lineage = new ArrayList<>();
2098        Map<String, List<String>> ctePublishedColumns = new LinkedHashMap<>();
2099        Map<String, Integer> cteNameToStatementIndex = buildUpdateCteList(
2100                update, provider, stmts, lineage, ctePublishedColumns);
2101
2102        // 2) Target table — defensive (parser usually rejects first).
2103        TTable targetTable = update.getTargetTable();
2104        if (targetTable == null || targetTable.getTableName() == null) {
2105            throw new SemanticIRBuildException(Diagnostic.error(
2106                    DiagnosticCode.UPDATE_TARGET_MISSING,
2107                    "UPDATE statement has no resolvable target table",
2108                    update));
2109        }
2110        String targetQName = targetTable.getTableName().toString();
2111        if (targetQName == null || targetQName.isEmpty()) {
2112            throw new SemanticIRBuildException(Diagnostic.error(
2113                    DiagnosticCode.UPDATE_TARGET_MISSING,
2114                    "UPDATE target table name is empty",
2115                    update));
2116        }
2117
2118        // 3) Slice 82 — FROM-side joined UPDATE is now admitted. The
2119        // slice-80 UPDATE_JOINED_NOT_SUPPORTED rejects (which fired on
2120        // update.tables.size() > 1 and update.getFromSourceJoin() != null)
2121        // are removed. The shape-specific rejects below (subquery in
2122        // FROM, USING, NATURAL, subquery in ON, window in ON) replace
2123        // them. UPDATE_JOINED_NOT_SUPPORTED remains declared but
2124        // unreached for API stability (the residual join-form-target
2125        // shape `UPDATE (a JOIN b) SET ...` does not parse in any
2126        // supported dialect — verified by AST probe).
2127        //
2128        // Reject ordering within buildUpdate: WITH / target-missing /
2129        // RETURNING / OUTPUT / ORDER BY / LIMIT / SET-empty all run
2130        // before the per-source FROM walk so a single rejection wins
2131        // on multi-violation shapes (e.g. `UPDATE t ... FROM s
2132        // RETURNING ...` rejects RETURNING before the FROM walk).
2133
2134        // 4) Slice 85 lifts the RETURNING / OUTPUT rejects — projections
2135        // are now admitted via {@link #buildReturningColumns} called after
2136        // SET / WHERE / FROM walks complete (the projection expressions
2137        // need the providerWithStar binding constructed in step 5.5).
2138        // The cheap statement-level OUTPUT_INTO reject fires here so a
2139        // multi-violation shape (OUTPUT … INTO target with RETURNING
2140        // content errors) routes to the cheaper structural code first.
2141        // {@code UPDATE_RETURNING_CLAUSE_NOT_SUPPORTED} and
2142        // {@code UPDATE_OUTPUT_CLAUSE_NOT_SUPPORTED} stay declared but
2143        // unreached (slice 71/72 retain-for-documentation precedent).
2144        if (update.getOutputClause() != null
2145                && update.getOutputClause().getIntoTable() != null) {
2146            throw new SemanticIRBuildException(Diagnostic.error(
2147                    DiagnosticCode.OUTPUT_INTO_NOT_SUPPORTED,
2148                    "UPDATE OUTPUT ... INTO <target> writes a second target; "
2149                            + "slice 85 admits projection-only OUTPUT",
2150                    update));
2151        }
2152        if (update.getOrderByClause() != null
2153                || update.getLimitClause() != null) {
2154            throw new SemanticIRBuildException(Diagnostic.error(
2155                    DiagnosticCode.UPDATE_ORDER_BY_OR_LIMIT_NOT_SUPPORTED,
2156                    "UPDATE with ORDER BY / LIMIT (MySQL / Couchbase) is "
2157                            + "not supported by SemanticIRBuilder.buildUpdate; "
2158                            + "slice 80 admits no row-pruning on UPDATE",
2159                    update));
2160        }
2161
2162        // 5) SET / UNSET — slice 80 requires a non-empty SET clause; a
2163        // Couchbase UNSET-only update (UnSetTerms populated, SET empty)
2164        // routes through the same code with discriminating message text.
2165        TResultColumnList sets = update.getResultColumnList();
2166        boolean hasUnSet = update.getUnSetTerms() != null
2167                && update.getUnSetTerms().size() > 0;
2168        if (sets == null || sets.size() == 0) {
2169            String reason = hasUnSet
2170                    ? "UPDATE has only an UNSET clause (Couchbase); slice 80 "
2171                            + "requires a non-empty SET clause"
2172                    : "UPDATE has no SET clause";
2173            throw new SemanticIRBuildException(Diagnostic.error(
2174                    DiagnosticCode.UPDATE_NO_SET_CLAUSE,
2175                    reason,
2176                    update));
2177        }
2178
2179        // 5.5) Slice 83 — extract FROM subqueries as their own
2180        // StatementGraphs (after slice 105's CTE walker so the CTE
2181        // bodies precede any extracted FROM-subquery in the program).
2182        //
2183        // The extractor reuses the SELECT-side
2184        // {@link #processDirectSubqueryTable} verbatim — passing the
2185        // slice-105 cteNameToStatementIndex + ctePublishedColumns so a
2186        // nested SELECT inside a FROM-subquery can still resolve outer
2187        // CTE references through CTEScope (Resolver2 already binds CTE
2188        // refs in UPDATE correctly; the maps are passed for parity with
2189        // the SELECT/MERGE call sites). Inner predicate subqueries in
2190        // WHERE / JOIN ON / GROUP BY are caught by the slice-17 leak
2191        // guard ({@link #rejectSubqueriesInFromSubqueryBodyClauses}).
2192        //
2193        // No snapshot/rollback wrapper here (codex round-1 Q5 NICE):
2194        // buildUpdate owns fresh local stmts/lineage lists and
2195        // propagates exceptions to the caller — no observer can see
2196        // partial mutation.
2197        //
2198        // Slice 110 — decorate `provider` with `withCteContext` BEFORE
2199        // passing it to `extractUpdateFromSubqueries` so a nested SELECT
2200        // inside an extracted FROM-subquery body (e.g.
2201        // `UPDATE t SET col = sub.x FROM (SELECT id, x FROM cte) sub`)
2202        // routes CTE refs through `RelationKind.CTE`. Mirrors the
2203        // slice-106 DELETE-side `providerWithCte` pattern at line ~3205
2204        // (codex round-2 Q2 BLOCKING fix in slice 106). The slice-105
2205        // UPDATE site missed this decoration; slice 110 closes the gap
2206        // here since it also adds the same decoration on the WHERE-side
2207        // predicate-subquery extraction (line ~2370 below).
2208        NameBindingProvider providerWithCte = cteNameToStatementIndex.isEmpty()
2209                ? provider
2210                : provider.withCteContext(cteNameToStatementIndex.keySet());
2211        Map<String, Integer> subqueryAliasToIndex =
2212                extractUpdateFromSubqueries(update, providerWithCte, stmts, lineage,
2213                        cteNameToStatementIndex, ctePublishedColumns);
2214        // Build the in-scope map (subquery-alias → published column
2215        // names, plus CTE-bound alias → CTE published columns) so
2216        // `provider.withInScopeRelationColumns(map)` recognises
2217        // `sub.x` AND `cte.x` for the consuming UPDATE. Base-table
2218        // FROM-side relations don't need an entry; their column
2219        // resolution stays on the Resolver2 catalog path.
2220        Map<String, List<String>> updateInScope = buildUpdateInScopeMap(
2221                update, subqueryAliasToIndex, stmts,
2222                cteNameToStatementIndex, ctePublishedColumns);
2223        // Slice 110 — base `providerWithStar` on `providerWithCte`
2224        // (instead of raw `provider`) so SET RHS / WHERE / RETURNING
2225        // collectors and the slice-86 USING/NATURAL walker all see the
2226        // outer CTE context. Without this, a CTE-bound reference inside
2227        // a JOIN ON expression or a SET RHS scalar would bind as TABLE-
2228        // kind even when the CTE is declared at the UPDATE level.
2229        NameBindingProvider providerWithStar = updateInScope.isEmpty()
2230                ? providerWithCte
2231                : providerWithCte.withInScopeRelationColumns(updateInScope);
2232
2233        // 5.7) Slice 86 — relocated from slice-82 step 8. The FROM-side
2234        // join walker now runs BEFORE SET RHS / WHERE collection so the
2235        // slice-86 UsingScope (step 5.8 below) can be applied to those
2236        // collectors. Slice 65 SELECT-side ordering: buildRelations →
2237        // buildUsingScope → buildOutputColumns / buildFilter / etc.
2238        // The join walker uses `providerWithStar` (inScope only — no
2239        // UsingScope yet) because USING/NATURAL emit joinColumnRefs[]
2240        // directly via emitMergedJoinRefs without consulting UsingScope.
2241        //
2242        // The walker treats `update.getJoins()` as the authoritative
2243        // FROM-list representation:
2244        //  - PG plain `FROM s` →   joins=[{table=s, items=[]}]
2245        //  - PG comma-FROM   →     joins=[{s1, items=[]}, {s2, items=[]}, ...]
2246        //  - PG / MSSQL explicit JOIN → joins=[{driver, items=[item1,...]}]
2247        //  - MSSQL target-in-FROM → joins=[{target_alias, items=[other,...]}]
2248        //
2249        // For each TJoin: the driver table goes through buildUpdateRelation
2250        // (which applies the slice-82 FROM-source rejects + identity
2251        // filter); each JoinItem is walked through buildUpdateJoinItem
2252        // which (slice 86) admits USING / NATURAL via slice-64/65/66
2253        // shared helpers in addition to ON / CROSS.
2254        List<RelationSource> relations = new ArrayList<>();
2255        // Slice 82 codex round-1 Q2 BLOCKING — LinkedHashSet dedup spans
2256        // the whole FROM so a column appearing in two ON clauses
2257        // produces one entry. Slice 86 USING/NATURAL emit refs also flow
2258        // through this dedup.
2259        java.util.LinkedHashSet<ColumnRef> joinRefsSet =
2260                new java.util.LinkedHashSet<>();
2261        for (TJoin join : update.getJoins()) {
2262            TTable leftTable = join.getTable();
2263            buildUpdateRelation(leftTable, targetTable, relations, update,
2264                    cteNameToStatementIndex);
2265            TJoinItemList items = join.getJoinItems();
2266            if (items == null) continue;
2267            // Slice 86 — per top-level TJoin LeftOutputState seeded
2268            // with providerWithStar (codex round-1 B2 BLOCKING: inScope
2269            // installed so extracted FROM-subquery drivers' published
2270            // columns are visible to lookupRelationColumnNames for
2271            // NATURAL inference). Reset between top-level TJoins so
2272            // comma-FROM groups stay independent (matches SELECT-side
2273            // buildRelations slice-66 behavior).
2274            LeftOutputState leftState = new LeftOutputState();
2275            seedLeftOutput(leftState, leftTable, providerWithStar);
2276            for (int i = 0; i < items.size(); i++) {
2277                TJoinItem item = items.getJoinItem(i);
2278                // Slice 86 — extended buildUpdateJoinItem signature
2279                // threads the join context (topJoin / items / itemIndex)
2280                // and LeftOutputState to the USING/NATURAL admit paths
2281                // so they can call the SELECT-side slice-64/65/66
2282                // shared helpers verbatim.
2283                // Slice 105 — threads cteNameToStatementIndex so the
2284                // join walker's per-item buildUpdateRelation call can
2285                // route objectname-typed CTE references to a SUBQUERY-
2286                // kind RelationSource pointing at the CTE statement.
2287                buildUpdateJoinItem(join, items, i, targetTable,
2288                        providerWithStar, relations, joinRefsSet, leftState,
2289                        update, cteNameToStatementIndex);
2290            }
2291        }
2292        List<ColumnRef> joinRefs = new ArrayList<>(joinRefsSet);
2293
2294        // 5.8) Slice 86 — install the UPDATE's own UsingScope on
2295        // providerWithStar AFTER the join walker so SET RHS / WHERE /
2296        // RETURNING refs see merged-key resolution (mirrors SELECT-side
2297        // buildSelectStatementImpl slice 65 ordering). The join walker
2298        // itself emits joinColumnRefs via direct emit-refs helpers, so
2299        // UsingScope is irrelevant to ON refs (matches SELECT-side
2300        // contract).
2301        UsingScope updateUsingScope = buildUpdateUsingScope(update, providerWithStar);
2302        if (!updateUsingScope.isEmpty()) {
2303            providerWithStar = providerWithStar.withUsingScope(updateUsingScope);
2304        }
2305
2306        // 5.9) Slice 115 — extract uncorrelated scalar subqueries on SET
2307        // RHS as their own <scalar_subquery_<idx>> StatementGraphs
2308        // appended to `stmts` BEFORE the UPDATE statement. Mirrors slice
2309        // 11 SELECT-side scalar projection extraction. A SET assignment
2310        // whose RHS is exactly a top-level subquery_t admits as a scalar
2311        // SET RHS: the body is built via buildSelectStatement (with the
2312        // slice-11 scalar-body invariants: allowFromSubqueries=false,
2313        // allowScalarProjectionSubqueries=false, allowWindowProjection=
2314        // false), inner predicate-leak guards run, and the resulting
2315        // ScalarInfo (extracted body index + inner output name) is stored
2316        // for the per-assignment loop and lineage emission below.
2317        //
2318        // Correlated scalar subqueries (whose inner refs would resolve to
2319        // an outer alias such as the UPDATE target or a FROM-side
2320        // relation) STILL reject via the slice-11 promoter called with
2321        // EnclosingScope.empty() — the inner ref's alias does not match
2322        // any local relation and no enclosing scope is provided, so
2323        // promoteCorrelatedRefsToOuterReference throws
2324        // SCALAR_SUBQUERY_UNKNOWN_RELATION_ALIAS. Lifting UPDATE-side
2325        // correlation is a follow-up slice (slice 14 SELECT analogue
2326        // extended to UPDATE).
2327        Map<Integer, List<ScalarInfo>> setRhsScalarInfo =
2328                extractScalarSubqueriesFromUpdateSetRhs(update, providerWithStar,
2329                        stmts, lineage, cteNameToStatementIndex,
2330                        subqueryAliasToIndex);
2331
2332        // 6) Per-assignment processing. Each TResultColumn carries an
2333        // assignment_t TExpression whose leftOperand is the SET LHS
2334        // (target column reference) and whose rightOperand is the value
2335        // expression. We collect:
2336        //   - target column spelling   → TargetRelation.columns[i]
2337        //   - synthetic output name    → outputs[i].name (verbatim LHS
2338        //                                   spelling, mirrors slice-78
2339        //                                   INSERT column-list contract)
2340        //   - RHS source column refs   → outputs[i].sources
2341        List<OutputColumn> outputs = new ArrayList<>();
2342        List<String> targetColumnNames = new ArrayList<>();
2343        for (int i = 0; i < sets.size(); i++) {
2344            TResultColumn rc = sets.getResultColumn(i);
2345            TExpression assignment = (rc == null) ? null : rc.getExpr();
2346            // Defensive: per TUpdateSqlStatement's javadoc each SET term
2347            // is an assignment_t. If the parser produced something else
2348            // (no AST shape observed in the tested corpora) we still
2349            // route through TUPLE_ASSIGNMENT_NOT_SUPPORTED so an
2350            // unexpected shape surfaces a stable diagnostic.
2351            if (assignment == null
2352                    || assignment.getExpressionType() != EExpressionType.assignment_t) {
2353                throw new SemanticIRBuildException(Diagnostic.error(
2354                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
2355                        "UPDATE SET assignment #" + (i + 1) + " is not a "
2356                                + "simple column-value assignment_t; slice 80 "
2357                                + "admits target_col = expr assignments only",
2358                        update));
2359            }
2360            TExpression lhs = assignment.getLeftOperand();
2361            TExpression rhs = assignment.getRightOperand();
2362            if (lhs == null || rhs == null) {
2363                throw new SemanticIRBuildException(Diagnostic.error(
2364                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
2365                        "UPDATE SET assignment #" + (i + 1)
2366                                + " is missing an operand",
2367                        update));
2368            }
2369            // Tuple LHS (Oracle) - SET (a, b) = (SELECT c1, c2 FROM ...)
2370            // surfaces as list_t. Reject before any subquery-on-RHS
2371            // walk so the diagnostic clearly identifies the tuple shape.
2372            if (lhs.getExpressionType() == EExpressionType.list_t) {
2373                throw new SemanticIRBuildException(Diagnostic.error(
2374                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
2375                        "UPDATE SET tuple assignment '(a, b) = ...' is not "
2376                                + "supported by SemanticIRBuilder.buildUpdate; "
2377                                + "slice 80 admits target_col = expr only",
2378                        update));
2379            }
2380            if (lhs.getExpressionType() != EExpressionType.simple_object_name_t) {
2381                throw new SemanticIRBuildException(Diagnostic.error(
2382                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
2383                        "UPDATE SET assignment #" + (i + 1) + " LHS is "
2384                                + "expressionType=" + lhs.getExpressionType()
2385                                + "; slice 80 admits simple column references only",
2386                        update));
2387            }
2388            TObjectName targetCol = lhs.getObjectOperand();
2389            if (targetCol == null) {
2390                throw new SemanticIRBuildException(Diagnostic.error(
2391                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
2392                        "UPDATE SET assignment #" + (i + 1) + " LHS has no "
2393                                + "TObjectName operand",
2394                        update));
2395            }
2396            String colSpelling = targetCol.toString();
2397
2398            // Slice 115 — top-level subquery_t SET RHS already extracted
2399            // in step 5.9 as a <scalar_subquery_<idx>> StatementGraph.
2400            // OutputColumn carries empty sources; slice-115/119 cross-stmt
2401            // edge below wires the consumer to the extracted body.
2402            if (rhs.getExpressionType() == EExpressionType.subquery_t) {
2403                targetColumnNames.add(colSpelling);
2404                outputs.add(new OutputColumn(colSpelling,
2405                        /*derived=*/ true, /*aggregate=*/ false,
2406                        Collections.<ColumnRef>emptyList()));
2407                continue;
2408            }
2409            // Slice 119 — mixed-expression scalar subquery path: subquery
2410            // nested inside a compound RHS (e.g. `SET col = (SELECT...) + 1`).
2411            // The scalar(s) were already extracted in step 5.9; collect only
2412            // the non-subquery column refs by skipping extracted subq nodes.
2413            if (containsAnySubqueryExpression(rhs)) {
2414                List<TExpression> subqRootsList =
2415                        collectNestedSubqueryExpressions(rhs);
2416                if (subqRootsList.isEmpty()) {
2417                    // P2-1 codex-review: containsAnySubqueryExpression
2418                    // returned true (via getSubQuery() != null) but no
2419                    // subquery_t nodes were found by acceptChildren
2420                    // traversal (e.g. EXISTS or non-scalar predicate
2421                    // subquery). Preserve the original reject so lineage
2422                    // is not silently dropped.
2423                    throw new SemanticIRBuildException(Diagnostic.error(
2424                            DiagnosticCode.UPDATE_SET_HAS_SUBQUERY_NOT_SUPPORTED,
2425                            "UPDATE SET assignment #" + (i + 1) + " right-hand "
2426                                    + "side contains a non-scalar subquery "
2427                                    + "(slice 119 admits only scalar subquery_t "
2428                                    + "inside compound expressions)",
2429                            update));
2430                }
2431                Set<TExpression> subqRoots = Collections.newSetFromMap(
2432                        new IdentityHashMap<TExpression, Boolean>());
2433                subqRoots.addAll(subqRootsList);
2434                // P2-2 codex-review: window functions in the non-subquery
2435                // part of a compound RHS are still illegal. Use the
2436                // skipping variant so scalar body contents are not scanned
2437                // (window functions inside a scalar SELECT are legitimate).
2438                rejectWindowFunctionInScopeSkipping(rhs, "UPDATE SET RHS",
2439                        subqRoots);
2440                List<ColumnRef> sources = collectColumnRefsSkipping(
2441                        rhs, providerWithStar, subqRoots);
2442                targetColumnNames.add(colSpelling);
2443                outputs.add(new OutputColumn(colSpelling,
2444                        /*derived=*/ true, /*aggregate=*/ false, sources));
2445                continue;
2446            }
2447            // Window function on RHS — reuse the existing scope reject.
2448            rejectWindowFunctionInScope(rhs, "UPDATE SET RHS");
2449
2450            // Collect physical column refs from the RHS expression.
2451            List<ColumnRef> sources = collectColumnRefs(rhs, providerWithStar);
2452            boolean derived =
2453                    rhs.getExpressionType() != EExpressionType.simple_object_name_t;
2454            targetColumnNames.add(colSpelling);
2455            outputs.add(new OutputColumn(colSpelling,
2456                    derived, /*aggregate=*/ false, sources));
2457        }
2458
2459        // 7) WHERE refs — slice 110 lifts the slice-80 blanket subquery
2460        // reject by routing uncorrelated predicate-subquery wrappers
2461        // (IN-SELECT / EXISTS / NOT EXISTS / scalar comparison /
2462        // ANY-ALL-SOME) through the slice-23+ JOIN-ON extraction pipeline
2463        // refactored to take a PredicateClauseContext. Each extracted
2464        // wrapper lands as its own <predicate_subquery_<i>> StatementGraph
2465        // BEFORE the UPDATE statement (so updateIdx already accounts for
2466        // them via stmts.size() below). Remaining non-subquery refs flow
2467        // into filterColumnRefs via collectColumnRefsSkipping. SET-RHS
2468        // subqueries still reject (slice-110 scope excludes SET RHS).
2469        // Window functions in non-subquery subtrees still reject via
2470        // the existing rejectWindowFunctionInScopeSkipping helper.
2471        List<ColumnRef> filterRefs;
2472        TWhereClause where = update.getWhereClause();
2473        if (where == null || where.getCondition() == null) {
2474            filterRefs = Collections.<ColumnRef>emptyList();
2475        } else {
2476            Set<TExpression> extractedWhereRoots =
2477                    Collections.<TExpression>emptySet();
2478            if (containsAnySubquery(where)) {
2479                // Slice 110 — `providerWithStar` already carries
2480                // `withCteContext(cteNameToStatementIndex.keySet())`
2481                // (applied at the providerWithCte → providerWithStar
2482                // chain above) so the predicate body's inner SELECT's
2483                // `FROM cte` refs route through `RelationKind.CTE`.
2484                // Without that, emitLineageForStatement would lose the
2485                // STATEMENT_OUTPUT → STATEMENT_OUTPUT edge to the CTE
2486                // body.
2487                extractedWhereRoots =
2488                        extractUncorrelatedPredicateSubqueriesFromClause(
2489                                where.getCondition(), providerWithStar,
2490                                stmts, lineage, cteNameToStatementIndex,
2491                                PredicateClauseContext.UPDATE_WHERE);
2492                rejectAnyRemainingSubqueriesFromClause(
2493                        where.getCondition(), extractedWhereRoots,
2494                        PredicateClauseContext.UPDATE_WHERE);
2495            }
2496            rejectWindowFunctionInScopeSkipping(where, "WHERE clause",
2497                    extractedWhereRoots);
2498            // Slice 83 — providerWithStar so WHERE refs against
2499            // extracted subquery aliases bind correctly. Slice 110 —
2500            // skip extracted predicate-subquery subtrees so inner refs
2501            // do not leak into outer filterColumnRefs (mirrors the
2502            // slice-23 JOIN-ON ref collector).
2503            filterRefs = collectColumnRefsSkipping(where, providerWithStar,
2504                    extractedWhereRoots);
2505        }
2506
2507        // (Step 8 of slice 82 was relocated to step 5.7 by slice 86 so
2508        // the slice-86 UsingScope built at step 5.8 can apply to the
2509        // SET RHS / WHERE collectors above. The walker logic itself is
2510        // unchanged from slice 82's contract — only its position moved.)
2511
2512        RelationBinding targetBinding = new RelationBinding(
2513                RelationKind.TABLE, targetQName);
2514        TargetRelation target = new TargetRelation(targetBinding, targetColumnNames);
2515
2516        // Slice 85 — build RETURNING / OUTPUT projection columns BEFORE
2517        // the StatementGraph so the new returningColumns slot can be
2518        // populated. updateIdx is computed first (deterministic — the
2519        // DML's position is stmts.size() at the moment of the
2520        // upcoming stmts.add(updateStmt)). LineageEdges are emitted
2521        // here via the shared helper (consumer ← producer).
2522        int updateIdx = stmts.size();
2523        // UPDATE target alias = effective alias from the target's
2524        // TTable (slice-82 / slice-83 use the same convention for
2525        // FROM-side reference identity). FROM-side relations is the
2526        // walked `relations[]` list already built above.
2527        String updateTargetAlias = effectiveAliasOf(targetTable);
2528        if (updateTargetAlias == null || updateTargetAlias.isEmpty()) {
2529            updateTargetAlias = targetQName;
2530        }
2531        List<OutputColumn> returningColumns = buildReturningColumns(
2532                update.getReturningClause(),
2533                update.getOutputClause(),
2534                "UPDATE",
2535                targetQName,
2536                updateTargetAlias,
2537                /*targetTable=*/ targetTable,
2538                relations,
2539                providerWithStar,
2540                updateIdx,
2541                lineage,
2542                update);
2543
2544        StatementGraph updateStmt = new StatementGraph(
2545                /*name=*/ null,
2546                "UPDATE",
2547                relations,
2548                outputs,
2549                returningColumns,
2550                filterRefs,
2551                /*joinColumnRefs=*/ joinRefs,
2552                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
2553                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
2554                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
2555                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
2556                /*distinct=*/ false,
2557                /*setOperator=*/ null,
2558                /*rowLimit=*/ null,
2559                target);
2560
2561        // Slice 83 — updateIdx is dynamic: stmts already contains any
2562        // extracted FROM-subquery statements from step 5.5. The
2563        // slice-78/80 contract `target.col_i ← STATEMENT_OUTPUT(idx,
2564        // out_i)` is preserved by indexing the UPDATE's own statement
2565        // position rather than the slice-80 hardcoded 0.
2566        stmts.add(updateStmt);
2567
2568        // Slice 78/80 cross-stmt edges — one per SET assignment:
2569        //   target.col_i ← STATEMENT_OUTPUT(updateIdx, out_i)
2570        for (int i = 0; i < outputs.size(); i++) {
2571            String tgtName = targetColumnNames.get(i);
2572            String outName = outputs.get(i).getName();
2573            if (tgtName == null || tgtName.isEmpty()
2574                    || outName == null || outName.isEmpty()) {
2575                // Defensive — both should be the same verbatim spelling.
2576                continue;
2577            }
2578            lineage.add(new LineageEdge(
2579                    LineageRef.tableColumn(targetQName, tgtName),
2580                    LineageRef.statementOutput(updateIdx, outName)));
2581        }
2582
2583        // Slice 115 — for each SET assignment whose RHS was extracted as
2584        // a top-level scalar subquery in step 5.9, emit the cross-stmt
2585        // wire edge:
2586        //   STATEMENT_OUTPUT(updateIdx, outName) →
2587        //       STATEMENT_OUTPUT(scalarIdx, innerOutputName)
2588        // mirrors the SELECT-side slice-11 emission in
2589        // emitLineageForStatement (line ~7440). Runs AFTER the slice-78/
2590        // 80 target edge loop so the target edge is always emitted
2591        // first; the scalar-bound assignment's OutputColumn.sources is
2592        // empty by construction so the slice-83
2593        // emitUpdateSubquerySourceEdges call below is a no-op for these
2594        // outputs.
2595        // Slice 115/119 — one edge per extracted scalar per SET assignment:
2596        //   STATEMENT_OUTPUT(updateIdx, outName) → STATEMENT_OUTPUT(scalarIdx, innerOutputName)
2597        if (!setRhsScalarInfo.isEmpty()) {
2598            for (Map.Entry<Integer, List<ScalarInfo>> e : setRhsScalarInfo.entrySet()) {
2599                int ord = e.getKey();
2600                if (ord < 0 || ord >= outputs.size()) continue;
2601                String outName = outputs.get(ord).getName();
2602                if (outName == null || outName.isEmpty()) continue;
2603                for (ScalarInfo info : e.getValue()) {
2604                    lineage.add(new LineageEdge(
2605                            LineageRef.statementOutput(updateIdx, outName),
2606                            LineageRef.statementOutput(info.statementIndex,
2607                                    info.innerOutputName)));
2608                }
2609            }
2610        }
2611
2612        // Slice 83 — emit STATEMENT_OUTPUT(updateIdx, out_i) →
2613        // STATEMENT_OUTPUT(subIdx, col) edges for output sources that
2614        // bind to a SUBQUERY-kind relation in this UPDATE's
2615        // relations[]. Base-table FROM-side sources stay as
2616        // outputs[i].sources only — preserves the slice-82 contract
2617        // that joined UPDATE without subqueries emits exactly ONE
2618        // cross-stmt edge per SET assignment (the target edge above).
2619        // Slice 105 — combine the slice-83 subqueryAliasToIndex with
2620        // the slice-105 CTE-as-relation alias→cteIdx entries so a SET
2621        // RHS reference to a CTE column (which lives on a SUBQUERY-
2622        // kind relation per slice 105) still produces a cross-stmt
2623        // STATEMENT_OUTPUT edge to the CTE body. Without the merge the
2624        // visible OutputColumn.sources stays correct but lineage[]
2625        // silently drops the canonical edge (codex round-2 Q5).
2626        Map<String, Integer> combinedAliasToSubIdx =
2627                buildUpdateCombinedAliasToSubIdx(update,
2628                        subqueryAliasToIndex, cteNameToStatementIndex);
2629        if (!combinedAliasToSubIdx.isEmpty()) {
2630            emitUpdateSubquerySourceEdges(updateStmt, updateIdx,
2631                    combinedAliasToSubIdx, lineage);
2632        }
2633
2634        return new SemanticProgram(stmts, lineage);
2635    }
2636
2637    /**
2638     * Slice 83 — emit STATEMENT_OUTPUT → STATEMENT_OUTPUT edges from
2639     * each UPDATE output to its subquery-bound source column. Walks
2640     * {@code outputs[i].sources} and, for any source whose
2641     * {@code relationAlias} matches a SUBQUERY-kind entry in the
2642     * statement's {@link RelationSource} list, emits an edge to the
2643     * corresponding extracted subquery's STATEMENT_OUTPUT position.
2644     *
2645     * <p>Why not call {@link #emitLineageForStatement}? The SELECT-path
2646     * helper emits edges for ALL output sources (TABLE-kind →
2647     * TABLE_COLUMN; CTE/SUBQUERY-kind → STATEMENT_OUTPUT). For UPDATE
2648     * the slice-78/80 contract is intentionally narrower: the only
2649     * cross-stmt edge per SET assignment is the target edge. Adding
2650     * STATEMENT_OUTPUT → TABLE_COLUMN edges for base-table FROM-side
2651     * sources would change the cross-stmt edge count contract that
2652     * slice-82 tests assert ({@code edges.size() == numAssignments}).
2653     * The slice-83 emitter therefore is SUBQUERY-only — base-table
2654     * FROM-side refs continue to surface via {@code outputs[i].sources}
2655     * but emit no extra LineageEdge.
2656     */
2657    private static void emitUpdateSubquerySourceEdges(
2658            StatementGraph updateStmt,
2659            int updateIdx,
2660            Map<String, Integer> subqueryAliasToIndex,
2661            List<LineageEdge> lineage) {
2662        // Codex slice-83 diff-review Q1 BLOCKING — both the map and
2663        // the lookup must use the same casing policy so SQL like
2664        // `... FROM (SELECT ...) sub WHERE ... SUB.x = …` (resolver-2
2665        // may surface either case in `src.getRelationAlias()` depending
2666        // on dialect and quoting) still finds the SUBQUERY-kind entry.
2667        // The slice-83 inScope map and `subqueryAliasToIndex` are both
2668        // keyed lowercase; do the same here. (SELECT-side
2669        // `emitLineageForStatement` uses case-sensitive equality —
2670        // pre-existing limitation; a separate refactor.)
2671        Map<String, RelationSource> aliasToRelation = new HashMap<>();
2672        for (RelationSource rs : updateStmt.getRelations()) {
2673            String key = rs.getAlias();
2674            // Skip null / empty aliases — empty-string would produce a
2675            // vacuous "" key and could spuriously match other empty-alias
2676            // relations (codex round-2 Q2 advisory).
2677            if (key == null || key.isEmpty()) continue;
2678            aliasToRelation.put(key.toLowerCase(Locale.ROOT), rs);
2679        }
2680        for (OutputColumn out : updateStmt.getOutputColumns()) {
2681            String outName = out.getName();
2682            if (outName == null || outName.isEmpty()) continue;
2683            for (ColumnRef src : out.getSources()) {
2684                String srcAlias = src.getRelationAlias();
2685                if (srcAlias == null || srcAlias.isEmpty()) continue;
2686                RelationSource rel = aliasToRelation.get(
2687                        srcAlias.toLowerCase(Locale.ROOT));
2688                if (rel == null) continue;
2689                if (rel.getBinding() == null
2690                        || rel.getBinding().getKind() != RelationKind.SUBQUERY) {
2691                    continue;
2692                }
2693                Integer subIdx = subqueryAliasToIndex.get(
2694                        rel.getAlias().toLowerCase(Locale.ROOT));
2695                if (subIdx == null) continue;
2696                lineage.add(new LineageEdge(
2697                        LineageRef.statementOutput(updateIdx, outName),
2698                        LineageRef.statementOutput(subIdx, src.getColumnName())));
2699            }
2700        }
2701    }
2702
2703    /**
2704     * Slice 115 — walk the UPDATE's SET clause and extract each
2705     * assignment whose RHS is exactly a top-level
2706     * {@link EExpressionType#subquery_t} as its own
2707     * {@code <scalar_subquery_<idx>>} {@link StatementGraph} appended to
2708     * {@code stmts} BEFORE the UPDATE. Mirrors the SELECT-side
2709     * {@link #extractScalarSubqueriesAsStatementsInternal} slice-11
2710     * pipeline but iterates SET assignments instead of result columns.
2711     * Returns {@code assignmentOrdinal → ScalarInfo} so {@link #buildUpdate}
2712     * can wire the cross-stmt edge for each extracted body.
2713     *
2714     * <p>Scope rejects (mirroring slice 11):
2715     * <ul>
2716     *   <li>Multi-column inner SELECT —
2717     *       {@link DiagnosticCode#SCALAR_SUBQUERY_COLUMN_COUNT}.</li>
2718     *   <li>Inner projection has no alias and no column name —
2719     *       {@link DiagnosticCode#SCALAR_SUBQUERY_INNER_PROJECTION_UNNAMED}.</li>
2720     *   <li>Subqueries in scalar body's WHERE / JOIN ON / GROUP BY —
2721     *       slice-11 {@link #rejectSubqueriesInScalarBodyClauses}.</li>
2722     *   <li>FROM-subqueries inside scalar body —
2723     *       {@code allowFromSubqueries=false} (slice-15 invariant).</li>
2724     *   <li>Nested scalar projections inside scalar body —
2725     *       {@code allowScalarProjectionSubqueries=false} (set-op-branch
2726     *       precedent; slice 115 initial scope).</li>
2727     *   <li>Window functions in scalar body —
2728     *       {@code allowWindowProjection=false} (slice-11 precedent).</li>
2729     *   <li>Correlated scalar subqueries (inner refs to outer aliases) —
2730     *       {@link #promoteCorrelatedRefsToOuterReference} called with
2731     *       {@link EnclosingScope#empty()} throws
2732     *       {@link DiagnosticCode#SCALAR_SUBQUERY_UNKNOWN_RELATION_ALIAS}.
2733     *       Lifting UPDATE-side correlation is a follow-up slice
2734     *       (slice 14 SELECT analogue extended to UPDATE).</li>
2735     * </ul>
2736     *
2737     * <p>Snapshot/rollback wrapper around the loop body mirrors
2738     * {@link #extractScalarSubqueriesAsStatements} so a partial
2739     * extraction (e.g. second of two scalar SET RHS fails on shape
2740     * validation) truncates {@code stmts}/{@code lineage} back to the
2741     * pre-call boundary.
2742     *
2743     * <p>Assignments whose RHS is not a top-level {@code subquery_t} are
2744     * silently skipped here; they fall through to the per-assignment
2745     * loop's existing slice-80 / slice-115 mixed-expression reject path.
2746     */
2747    private static Map<Integer, List<ScalarInfo>> extractScalarSubqueriesFromUpdateSetRhs(
2748            TUpdateSqlStatement update,
2749            NameBindingProvider provider,
2750            List<StatementGraph> stmts,
2751            List<LineageEdge> lineage,
2752            Map<String, Integer> cteNameToStatementIndex,
2753            Map<String, Integer> subqueryAliasToIndex) {
2754        TResultColumnList sets = update.getResultColumnList();
2755        if (sets == null || sets.size() == 0) {
2756            return Collections.<Integer, List<ScalarInfo>>emptyMap();
2757        }
2758        // Fast pre-scan: any SET RHS that contains a subquery (top-level
2759        // subquery_t or nested inside a compound expression)? Avoids the
2760        // snapshot/rollback wrapper overhead when none are present.
2761        // Slice 115 handled top-level subquery_t only; slice 119 extends
2762        // to mixed-expression RHS (e.g. `SET col = (SELECT...) + 1`).
2763        // Tuple-LHS assignments (Oracle SET (a, b) = ...) are
2764        // intentionally skipped so the per-assignment loop's
2765        // UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED reject (slice 80
2766        // contract) wins.
2767        boolean anySubquery = false;
2768        for (int i = 0; i < sets.size(); i++) {
2769            TResultColumn rc = sets.getResultColumn(i);
2770            if (rc == null || rc.getExpr() == null) continue;
2771            TExpression assignment = rc.getExpr();
2772            if (assignment.getExpressionType() != EExpressionType.assignment_t) {
2773                continue;
2774            }
2775            TExpression lhs = assignment.getLeftOperand();
2776            if (lhs == null
2777                    || lhs.getExpressionType() == EExpressionType.list_t) {
2778                continue;
2779            }
2780            TExpression rhs = assignment.getRightOperand();
2781            if (rhs != null && containsAnySubqueryExpression(rhs)) {
2782                anySubquery = true;
2783                break;
2784            }
2785        }
2786        if (!anySubquery) {
2787            return Collections.<Integer, List<ScalarInfo>>emptyMap();
2788        }
2789        int stmtsSnapshot = stmts.size();
2790        int lineageSnapshot = lineage.size();
2791        try {
2792            return extractScalarSubqueriesFromUpdateSetRhsInternal(
2793                    update, provider, stmts, lineage,
2794                    cteNameToStatementIndex, subqueryAliasToIndex, sets);
2795        } catch (RuntimeException ex) {
2796            while (stmts.size() > stmtsSnapshot) stmts.remove(stmts.size() - 1);
2797            while (lineage.size() > lineageSnapshot) lineage.remove(lineage.size() - 1);
2798            throw ex;
2799        }
2800    }
2801
2802    /**
2803     * Internal body of {@link #extractScalarSubqueriesFromUpdateSetRhs};
2804     * wrapped with snapshot/rollback by the public entry point. Do not
2805     * call directly from non-wrapper sites.
2806     */
2807    private static Map<Integer, List<ScalarInfo>> extractScalarSubqueriesFromUpdateSetRhsInternal(
2808            TUpdateSqlStatement update,
2809            NameBindingProvider provider,
2810            List<StatementGraph> stmts,
2811            List<LineageEdge> lineage,
2812            Map<String, Integer> cteNameToStatementIndex,
2813            Map<String, Integer> subqueryAliasToIndex,
2814            TResultColumnList sets) {
2815        Map<Integer, List<ScalarInfo>> ordinalToInfo = new HashMap<>();
2816        for (int i = 0; i < sets.size(); i++) {
2817            TResultColumn rc = sets.getResultColumn(i);
2818            if (rc == null || rc.getExpr() == null) continue;
2819            TExpression assignment = rc.getExpr();
2820            if (assignment.getExpressionType() != EExpressionType.assignment_t) {
2821                continue;
2822            }
2823            TExpression lhs = assignment.getLeftOperand();
2824            // Skip tuple-LHS assignments — the per-assignment loop's
2825            // slice-80 UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED reject
2826            // should win for these (e.g. Oracle `SET (a, b) = (SELECT
2827            // c1, c2 FROM ...)`). Without this skip, the inner SELECT's
2828            // multi-column projection would surface as
2829            // SCALAR_SUBQUERY_COLUMN_COUNT here instead.
2830            if (lhs == null
2831                    || lhs.getExpressionType() == EExpressionType.list_t) {
2832                continue;
2833            }
2834            TExpression rhs = assignment.getRightOperand();
2835            if (rhs == null || !containsAnySubqueryExpression(rhs)) {
2836                continue; // no subquery in this RHS — handled by per-assignment loop
2837            }
2838            // "outer alias" used in diagnostic messages — the SET LHS
2839            // column spelling. Mirrors the slice-11 `outerAlias` role.
2840            String outerAlias = (lhs.getExpressionType() == EExpressionType.simple_object_name_t
2841                    && lhs.getObjectOperand() != null)
2842                    ? lhs.getObjectOperand().toString()
2843                    : ("SET assignment #" + (i + 1));
2844
2845            // Determine which subquery TExpression nodes to extract.
2846            // Slice 115 path: RHS is exactly a top-level subquery_t →
2847            //   single-element list.
2848            // Slice 119 path: RHS is a compound expression (arithmetic,
2849            //   CASE, function) containing one or more subquery_t nodes
2850            //   at any depth → list in traversal order.
2851            List<TExpression> subqExprs;
2852            if (rhs.getExpressionType() == EExpressionType.subquery_t) {
2853                subqExprs = Collections.singletonList(rhs);
2854            } else {
2855                subqExprs = collectNestedSubqueryExpressions(rhs);
2856            }
2857            if (subqExprs.isEmpty()) continue; // defensive (containsAnySubqueryExpression true but none found)
2858
2859            // Build the UPDATE-side enclosing scope once per assignment
2860            // (used by each per-scalar correlation promotion below).
2861            EnclosingScope innerEnclosing = buildUpdateEnclosingScope(update,
2862                    cteNameToStatementIndex, subqueryAliasToIndex,
2863                    /*parent=*/ null);
2864
2865            List<ScalarInfo> infos = new ArrayList<>();
2866            for (TExpression subqExpr : subqExprs) {
2867                TSelectSqlStatement inner = subqExpr.getSubQuery();
2868                if (inner == null) {
2869                    throw new SemanticIRBuildException(
2870                            Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_NO_INNER_SELECT,
2871                            "scalar subquery on UPDATE SET RHS for '" + outerAlias
2872                                    + "' has no inner SELECT", rc));
2873                }
2874                // Pre-recursion validation (matches slice 11 ordering):
2875                // inspect inner column count and naming before recursive
2876                // build so the diagnostic is scalar-specific.
2877                TResultColumnList innerRcl = inner.getResultColumnList();
2878                if (innerRcl == null || innerRcl.size() == 0) {
2879                    throw new SemanticIRBuildException(
2880                            Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_COLUMN_COUNT,
2881                            "scalar subquery on UPDATE SET RHS for '" + outerAlias
2882                                    + "' must project exactly one column, got 0", rc));
2883                }
2884                if (innerRcl.size() != 1) {
2885                    throw new SemanticIRBuildException(
2886                            Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_COLUMN_COUNT,
2887                            "scalar subquery on UPDATE SET RHS for '" + outerAlias
2888                                    + "' must project exactly one column, got "
2889                                    + innerRcl.size(), rc));
2890                }
2891                TResultColumn innerCol = innerRcl.getResultColumn(0);
2892                String innerAlias = innerCol.getColumnAlias();
2893                String innerColName = innerCol.getColumnNameOnly();
2894                boolean innerHasName =
2895                        (innerAlias != null && !innerAlias.isEmpty())
2896                                || (innerColName != null && !innerColName.isEmpty());
2897                if (!innerHasName && !isConstantExpression(innerCol.getExpr())) {
2898                    throw new SemanticIRBuildException(
2899                            Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_INNER_PROJECTION_UNNAMED,
2900                            "scalar subquery on UPDATE SET RHS for '" + outerAlias
2901                                    + "' inner projection has no alias and no column "
2902                                    + "name; add an explicit alias inside the subquery",
2903                            rc));
2904                }
2905                // Predicate-leak guard: scalar body's WHERE / JOIN ON /
2906                // GROUP BY must not contain subqueries.
2907                rejectSubqueriesInScalarBodyClauses(inner, outerAlias);
2908
2909                // Slice 117 / 119 — decorate provider with the inner
2910                // SELECT's local FROM aliases for tolerant outer-binding.
2911                Set<String> innerLocalAliases = precomputeInnerLocalAliases(inner);
2912                NameBindingProvider tolerantProvider = innerLocalAliases.isEmpty()
2913                        ? provider
2914                        : provider.withTolerantOuterBinding(innerLocalAliases);
2915
2916                String scalarName = SCALAR_BODY_PREFIX + stmts.size() + ">";
2917                StatementGraph innerStmt = buildSelectStatement(inner, tolerantProvider,
2918                        scalarName,
2919                        /*hasOuterCteListAlreadyProcessed=*/ false,
2920                        /*allowFromSubqueries=*/ false,
2921                        /*allowScalarProjectionSubqueries=*/ false,
2922                        /*allowWindowProjection=*/ false);
2923                innerStmt = promoteCorrelatedRefsToOuterReference(
2924                        innerStmt, outerAlias, innerEnclosing);
2925                int idx = stmts.size();
2926                stmts.add(innerStmt);
2927                String innerOutName = effectiveOutputName(innerCol);
2928                infos.add(new ScalarInfo(idx, innerOutName));
2929                emitLineageForStatement(innerStmt, idx, lineage,
2930                        cteNameToStatementIndex,
2931                        innerEnclosing.flattenSubqueryAliasToIndex(),
2932                        Collections.<Integer, ScalarInfo>emptyMap());
2933            }
2934            ordinalToInfo.put(i, infos);
2935        }
2936        return ordinalToInfo;
2937    }
2938
2939    /**
2940     * Slice 82 — process one FROM-side source table for joined
2941     * {@link #buildUpdate}. Applies the slice-82 reject contract for
2942     * non-table FROM sources, then appends a TABLE-kind
2943     * {@link RelationSource} unless the table is the target
2944     * (reference-identity filter — clean IR semantics: relations[]
2945     * models read-side sources only).
2946     */
2947    private static void buildUpdateRelation(TTable t, TTable targetTable,
2948                                            List<RelationSource> relations,
2949                                            TUpdateSqlStatement update,
2950                                            Map<String, Integer> cteNameToStatementIndex) {
2951        if (t == null) {
2952            return; // defensive — parser should never produce a null table
2953        }
2954        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
2955            // Slice 83 — admit FROM-side subqueries. The inner SELECT
2956            // has already been extracted as its own StatementGraph by
2957            // {@link #extractUpdateFromSubqueries} (step 5.5 of
2958            // buildUpdate). Here we publish the SUBQUERY-kind
2959            // {@link RelationSource} so {@code outputs[i].sources}
2960            // resolved via the inScope-enhanced provider can route to
2961            // it. Alias and qualifiedName both use
2962            // {@code effectiveAliasOf(t)} — matching slice-14 / slice-58
2963            // SUBQUERY-kind convention used by SELECT.
2964            //
2965            // {@code UPDATE_FROM_SUBQUERY_NOT_SUPPORTED} stays declared
2966            // but unreached (slice-71/72 retain-for-documentation
2967            // precedent — keeps the public DiagnosticCode enum stable
2968            // for consumers that route by code).
2969            String subAlias = effectiveAliasOf(t);
2970            if (subAlias != null && !subAlias.isEmpty()) {
2971                relations.add(new RelationSource(subAlias,
2972                        new RelationBinding(RelationKind.SUBQUERY, subAlias)));
2973            }
2974            return;
2975        }
2976        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
2977            // Defensive: TTable wrapping a TJoin. Not reached by any
2978            // observed parser path on the supported dialects (slice-82
2979            // probe set), but gets its own DiagnosticCode so consumers
2980            // can route this distinct shape without parsing message
2981            // text — per slice-80's message-text-discrimination
2982            // contract (codex round-1 Q4 BLOCKING).
2983            throw new SemanticIRBuildException(Diagnostic.error(
2984                    DiagnosticCode.UPDATE_FROM_NESTED_JOIN_NOT_SUPPORTED,
2985                    "UPDATE FROM source is a nested join wrapper; "
2986                            + "slice 82 admits simple table FROM sources only",
2987                    update));
2988        }
2989        // Reference-identity filter: target's own TTable instance is
2990        // excluded from relations[]. In MSSQL `UPDATE T2 ... FROM Table2 T2 …`,
2991        // tables[0] / joins[0].getTable() IS the same instance as
2992        // update.getTargetTable(); excluding it keeps the IR clean
2993        // (relations[] models reads, target models writes). The
2994        // catalog-miss WARN walker's target-first ordering handles the
2995        // cross-instance-same-name MSSQL self-join edge case where two
2996        // distinct TTable instances share the same qualified name.
2997        if (t == targetTable) {
2998            return;
2999        }
3000        TObjectName tName = t.getTableName();
3001        if (tName == null) {
3002            return; // defensive
3003        }
3004        // Slice 105 — FROM-side CTE detection. When the FROM-side table
3005        // is an objectname-typed reference whose bare name matches a
3006        // declared CTE in this UPDATE's outer WITH clause, emit a
3007        // SUBQUERY-kind RelationSource pointing at the CTE statement
3008        // (mirrors MERGE USING-as-CTE in slice 101). The slice-77
3009        // catalog-miss WARN walker filters to RelationKind.TABLE so
3010        // CTE-bound relations are naturally skipped, even when the
3011        // catalog also declares the same name (codex round-2 Q4
3012        // confirmed YES). The cross-stmt lineage edge from
3013        // STATEMENT_OUTPUT(updateIdx,col) → STATEMENT_OUTPUT(cteIdx,col)
3014        // is emitted by emitUpdateSubquerySourceEdges using the
3015        // combined alias→subIdx map.
3016        if (cteNameToStatementIndex != null
3017                && !cteNameToStatementIndex.isEmpty()) {
3018            String bareName = tName.toString();
3019            if (bareName != null && !bareName.isEmpty()) {
3020                String bareNameLower = bareName.toLowerCase(Locale.ROOT);
3021                if (cteNameToStatementIndex.containsKey(bareNameLower)) {
3022                    String cteAlias = effectiveAliasOf(t);
3023                    if (cteAlias == null || cteAlias.isEmpty()) {
3024                        cteAlias = bareName;
3025                    }
3026                    relations.add(new RelationSource(cteAlias,
3027                            new RelationBinding(RelationKind.SUBQUERY, cteAlias)));
3028                    return;
3029                }
3030            }
3031        }
3032        // effectiveAliasOf returns the SQL-written alias if present,
3033        // else the table name. RelationSource requires a non-empty
3034        // alias; this matches the slice-58/59 buildRelation contract.
3035        relations.add(new RelationSource(effectiveAliasOf(t),
3036                new RelationBinding(RelationKind.TABLE, tName.toString())));
3037    }
3038
3039    /**
3040     * Slice 82 (extended by slice 86) — process one {@link TJoinItem}
3041     * for joined {@link #buildUpdate}. Routes USING / NATURAL JoinItems
3042     * through the SELECT-side slice-64/65/66 shared helpers
3043     * ({@link #populateUsingJoinRefs} / {@link #emitMergedJoinRefs} /
3044     * {@link #naturalSharedKeys}) so the UPDATE join walker emits the
3045     * same {@code joinColumnRefs[]} shape as a SELECT body. ON / CROSS
3046     * JoinItems retain the slice-82 reject contract (subquery in ON,
3047     * window in ON) and ref-collection path.
3048     *
3049     * <p>Slice 86 signature extension: the join context
3050     * ({@code topJoin}, {@code items}, {@code itemIndex}) and the
3051     * per-top-level-TJoin {@link LeftOutputState} are required by the
3052     * shared helpers — the prior-relations chain for emit-refs and the
3053     * accumulated left row type for NATURAL inference.
3054     *
3055     * <p>USING / NATURAL shape conflicts (USING+ON, NATURAL+USING,
3056     * NATURAL+ON) reuse the existing slice-64/66 codes
3057     * ({@link DiagnosticCode#JOIN_WITH_BOTH_ON_AND_USING},
3058     * {@link DiagnosticCode#NATURAL_WITH_USING},
3059     * {@link DiagnosticCode#NATURAL_WITH_ON}) rather than introducing
3060     * UPDATE-specific codes — matching slice 86's "reuse SELECT-side
3061     * machinery verbatim" architecture.
3062     *
3063     * <p>Slice 82's lifted reject codes
3064     * ({@link DiagnosticCode#UPDATE_FROM_JOIN_USING_NOT_SUPPORTED} and
3065     * {@link DiagnosticCode#UPDATE_FROM_JOIN_NATURAL_NOT_SUPPORTED})
3066     * stay declared-but-unreached for API stability — slice 71/72/82
3067     * retain-for-documentation precedent.
3068     */
3069    private static void buildUpdateJoinItem(TJoin topJoin,
3070                                            TJoinItemList items,
3071                                            int itemIndex,
3072                                            TTable targetTable,
3073                                            NameBindingProvider provider,
3074                                            List<RelationSource> relations,
3075                                            java.util.LinkedHashSet<ColumnRef> joinRefs,
3076                                            LeftOutputState leftState,
3077                                            TUpdateSqlStatement update,
3078                                            Map<String, Integer> cteNameToStatementIndex) {
3079        if (items == null) return;
3080        TJoinItem item = items.getJoinItem(itemIndex);
3081        if (item == null) return;
3082
3083        TObjectNameList usingCols = item.getUsingColumns();
3084        boolean hasUsing = usingCols != null && usingCols.size() > 0;
3085        boolean isNatural = isNaturalJoinType(item.getJoinType());
3086        boolean hasOn = item.getOnCondition() != null;
3087
3088        // Slice 86 — USING/NATURAL admit paths. Shape conflicts use the
3089        // slice-64/66 SELECT-side codes verbatim; the UPDATE-specific
3090        // lifted codes (UPDATE_FROM_JOIN_USING_NOT_SUPPORTED /
3091        // UPDATE_FROM_JOIN_NATURAL_NOT_SUPPORTED) are no longer thrown
3092        // (declared-but-unreached for API stability).
3093        if (isNatural && hasUsing) {
3094            throw new SemanticIRBuildException(Diagnostic.error(
3095                    DiagnosticCode.NATURAL_WITH_USING,
3096                    "NATURAL JOIN must not carry a USING clause; choose "
3097                            + "either NATURAL or USING, not both", item));
3098        }
3099        if (isNatural && hasOn) {
3100            throw new SemanticIRBuildException(Diagnostic.error(
3101                    DiagnosticCode.NATURAL_WITH_ON,
3102                    "NATURAL JOIN must not carry an ON condition; rewrite "
3103                            + "as JOIN ... ON, or drop the NATURAL keyword", item));
3104        }
3105        if (hasUsing && hasOn) {
3106            throw new SemanticIRBuildException(Diagnostic.error(
3107                    DiagnosticCode.JOIN_WITH_BOTH_ON_AND_USING,
3108                    "JOIN cannot carry both ON and USING; choose one", item));
3109        }
3110
3111        if (hasUsing) {
3112            // Right-side table first: applies slice-82 source-shape
3113            // rejects + identity filter exactly as the ON path.
3114            buildUpdateRelation(item.getTable(), targetTable, relations, update,
3115                    cteNameToStatementIndex);
3116            // Slice 64 emit-refs: left-then-right per key, walking
3117            // priorRelations = topJoin.getTable() + items[0..itemIndex-1].
3118            List<ColumnRef> usingRefs = new ArrayList<>();
3119            populateUsingJoinRefs(topJoin, items, itemIndex, item.getTable(),
3120                    usingCols, provider, usingRefs);
3121            joinRefs.addAll(usingRefs);
3122            // Slice 66 LeftOutputState update: merge right's columns
3123            // into accumulated state so a subsequent NATURAL JoinItem
3124            // sees the row type (matches SELECT-side
3125            // {@code buildRelations}).
3126            List<String> usingKeyNames = new ArrayList<>(usingCols.size());
3127            for (int k = 0; k < usingCols.size(); k++) {
3128                TObjectName key = usingCols.getObjectName(k);
3129                if (key == null) continue;
3130                String keyName = key.getColumnNameOnly();
3131                if (keyName != null && !keyName.isEmpty()) {
3132                    usingKeyNames.add(keyName);
3133                }
3134            }
3135            mergeRightIntoLeftOutput(leftState, item.getTable(), provider,
3136                    usingKeyNames);
3137            return;
3138        }
3139
3140        if (isNatural) {
3141            // Right-side table first; identity filter excludes target.
3142            buildUpdateRelation(item.getTable(), targetTable, relations, update,
3143                    cteNameToStatementIndex);
3144            // Slice 66 catalog-required NATURAL inference. Reject with
3145            // NATURAL_CATALOG_REQUIRED (re-use SELECT-side code) when
3146            // either side lacks resolvable column metadata.
3147            NaturalKeyResult r = naturalSharedKeys(leftState, item.getTable(), provider);
3148            if (r.kind != NaturalKeyResult.Kind.SUCCESS) {
3149                throw new SemanticIRBuildException(Diagnostic.error(
3150                        DiagnosticCode.NATURAL_CATALOG_REQUIRED,
3151                        formatNaturalCatalogReject(r), item));
3152            }
3153            List<ColumnRef> naturalRefs = new ArrayList<>();
3154            emitMergedJoinRefs(JoinKind.NATURAL, r.keys, topJoin, items,
3155                    itemIndex, item.getTable(), provider, naturalRefs);
3156            joinRefs.addAll(naturalRefs);
3157            // Update LeftOutputState with the right's columns (merging
3158            // shared keys into existing slots, appending non-shared
3159            // columns as new entries).
3160            mergeRightIntoLeftOutput(leftState, item.getTable(), provider, r.keys);
3161            return;
3162        }
3163
3164        // ON / CROSS branch — slice-82 contract preserved.
3165        buildUpdateRelation(item.getTable(), targetTable, relations, update,
3166                    cteNameToStatementIndex);
3167        // Slice 86 — append right to LeftOutputState so subsequent
3168        // NATURAL JoinItems in the same top-level TJoin observe the
3169        // accumulated row type. CROSS / ON contribute non-merged
3170        // columns to state (matches SELECT-side appendRightToLeftOutput).
3171        appendRightToLeftOutput(leftState, item.getTable(), provider);
3172        TExpression onCond = item.getOnCondition();
3173        if (onCond == null) return; // CROSS JOIN: no ON.
3174        if (containsAnySubqueryExpression(onCond)) {
3175            throw new SemanticIRBuildException(Diagnostic.error(
3176                    DiagnosticCode.UPDATE_JOIN_ON_HAS_SUBQUERY_NOT_SUPPORTED,
3177                    "UPDATE FROM JOIN ON condition contains a subquery; "
3178                            + "slice 82 admits scalar predicates only",
3179                    item));
3180        }
3181        rejectWindowFunctionInScope(onCond, "UPDATE FROM JOIN ON");
3182        joinRefs.addAll(collectColumnRefs(onCond, provider));
3183    }
3184
3185    /**
3186     * Slice 83 — extract every FROM-side subquery in
3187     * {@code update.getJoins()} as its own {@link StatementGraph}
3188     * appended to {@code stmts} before the UPDATE itself. Walks both
3189     * the driver TTable of each TJoin AND each JoinItem's right table.
3190     * Returns an alias → stmts-index map so the consuming UPDATE can
3191     * (a) build its in-scope column map via
3192     * {@link #buildUpdateInScopeMap}, and (b) emit
3193     * STATEMENT_OUTPUT → STATEMENT_OUTPUT edges via
3194     * {@link #emitUpdateSubquerySourceEdges}.
3195     *
3196     * <p>Reuses the SELECT-side {@link #processDirectSubqueryTable}
3197     * verbatim, passing empty CTE maps because slice 80 already
3198     * rejects top-level WITH on UPDATE
3199     * ({@link DiagnosticCode#UPDATE_CTE_NOT_SUPPORTED}). The inner
3200     * SELECT's own FROM-subqueries are handled recursively by the
3201     * helper. Inner predicate subqueries in WHERE / JOIN ON /
3202     * GROUP BY are caught by the slice-17 leak guard
3203     * ({@link #rejectSubqueriesInFromSubqueryBodyClauses}). Inner
3204     * top-level WITH is rejected by
3205     * {@code buildSelectStatement(hasOuterCteListAlreadyProcessed=false)}.
3206     * Inner scalar projection subqueries are rejected by
3207     * {@code buildSelectStatement(allowScalarProjectionSubqueries=false)}.
3208     *
3209     * <p>No mutation-guard wrapper here: buildUpdate owns fresh local
3210     * lists and exceptions propagate to the caller (codex round-1 Q5
3211     * NICE).
3212     */
3213    private static Map<String, Integer> extractUpdateFromSubqueries(
3214            TUpdateSqlStatement update,
3215            NameBindingProvider provider,
3216            List<StatementGraph> stmts,
3217            List<LineageEdge> lineage,
3218            Map<String, Integer> cteNameToStatementIndex,
3219            Map<String, List<String>> ctePublishedColumns) {
3220        Map<String, Integer> aliasToIndex = new HashMap<>();
3221        TJoinList joins = update.getJoins();
3222        if (joins == null) return aliasToIndex;
3223        // Slice 105 — forward the outer-WITH CTE maps so a nested SELECT
3224        // inside an extracted FROM-subquery body can resolve outer-WITH
3225        // CTE references. Resolver2 wires CTEScope already; the maps are
3226        // forwarded for parity with the SELECT / MERGE call sites.
3227        Map<String, Integer> cteMap = cteNameToStatementIndex == null
3228                ? Collections.<String, Integer>emptyMap()
3229                : cteNameToStatementIndex;
3230        Map<String, List<String>> ctePublished = ctePublishedColumns == null
3231                ? Collections.<String, List<String>>emptyMap()
3232                : ctePublishedColumns;
3233        for (TJoin join : joins) {
3234            // Driver table — may be a subquery (PG / Snowflake / BQ /
3235            // Redshift `UPDATE t SET … FROM (SELECT …) sub` shape).
3236            processDirectSubqueryTable(join.getTable(), provider,
3237                    stmts, lineage, cteMap, ctePublished, aliasToIndex);
3238            TJoinItemList items = join.getJoinItems();
3239            if (items == null) continue;
3240            for (int i = 0; i < items.size(); i++) {
3241                TJoinItem item = items.getJoinItem(i);
3242                if (item == null) continue;
3243                // Right-side table of a JoinItem — may be a subquery
3244                // (MSSQL / PG `UPDATE t SET … FROM x JOIN (SELECT …)
3245                // sub ON …` shape).
3246                processDirectSubqueryTable(item.getTable(), provider,
3247                        stmts, lineage, cteMap, ctePublished, aliasToIndex);
3248            }
3249        }
3250        return aliasToIndex;
3251    }
3252
3253    /**
3254     * Slice 83 — build an effective-alias-keyed in-scope map publishing
3255     * each extracted FROM-subquery's output column names. The consuming
3256     * UPDATE wraps its provider via
3257     * {@code provider.withInScopeRelationColumns(map)} so {@code sub.x}
3258     * resolves to the subquery's published column rather than failing
3259     * resolution against the catalog.
3260     *
3261     * <p>Base-table FROM-side relations do not need an entry: their
3262     * column resolution stays on the Resolver2 catalog path. Slice 60's
3263     * SELECT-side {@link #buildEffectiveAliasInScopeMap} also skips
3264     * base-table relations.
3265     *
3266     * <p>Slice 105 — when an outer WITH clause declares a CTE and a
3267     * FROM-side relation references that CTE by its bare name, publish
3268     * the CTE's column names against the FROM-side effective alias so
3269     * SET RHS / WHERE / ON refs against the CTE alias bind correctly.
3270     */
3271    private static Map<String, List<String>> buildUpdateInScopeMap(
3272            TUpdateSqlStatement update,
3273            Map<String, Integer> subqueryAliasToIndex,
3274            List<StatementGraph> stmts,
3275            Map<String, Integer> cteNameToStatementIndex,
3276            Map<String, List<String>> ctePublishedColumns) {
3277        Map<String, List<String>> result = new HashMap<>();
3278        boolean haveSubq = subqueryAliasToIndex != null
3279                && !subqueryAliasToIndex.isEmpty();
3280        boolean haveCte = cteNameToStatementIndex != null
3281                && !cteNameToStatementIndex.isEmpty();
3282        if (!haveSubq && !haveCte) {
3283            return result;
3284        }
3285        TJoinList joins = update.getJoins();
3286        if (joins == null) return result;
3287        for (TJoin join : joins) {
3288            addUpdateRelationToInScopeMap(join.getTable(),
3289                    subqueryAliasToIndex, stmts, result,
3290                    cteNameToStatementIndex, ctePublishedColumns);
3291            TJoinItemList items = join.getJoinItems();
3292            if (items == null) continue;
3293            for (int i = 0; i < items.size(); i++) {
3294                TJoinItem item = items.getJoinItem(i);
3295                if (item == null) continue;
3296                addUpdateRelationToInScopeMap(item.getTable(),
3297                        subqueryAliasToIndex, stmts, result,
3298                        cteNameToStatementIndex, ctePublishedColumns);
3299            }
3300        }
3301        return result;
3302    }
3303
3304    private static void addUpdateRelationToInScopeMap(TTable t,
3305            Map<String, Integer> subqueryAliasToIndex,
3306            List<StatementGraph> stmts,
3307            Map<String, List<String>> result,
3308            Map<String, Integer> cteNameToStatementIndex,
3309            Map<String, List<String>> ctePublishedColumns) {
3310        if (t == null) return;
3311        // Slice 105 — CTE-as-FROM-relation in-scope publication. When
3312        // the FROM-side table is an objectname-typed reference whose
3313        // bare name matches a declared outer CTE, publish the CTE's
3314        // own column names against the FROM-side effective alias so
3315        // SET RHS / WHERE refs against the CTE alias bind correctly.
3316        if (cteNameToStatementIndex != null
3317                && !cteNameToStatementIndex.isEmpty()
3318                && ctePublishedColumns != null
3319                && t.getTableType()
3320                        == gudusoft.gsqlparser.ETableSource.objectname) {
3321            TObjectName tName = t.getTableName();
3322            if (tName != null) {
3323                String bare = tName.toString();
3324                if (bare != null && !bare.isEmpty()) {
3325                    String bareLower = bare.toLowerCase(Locale.ROOT);
3326                    if (cteNameToStatementIndex.containsKey(bareLower)) {
3327                        String aliasKey = effectiveAliasLowerCaseOrNull(t);
3328                        if (aliasKey == null) aliasKey = bareLower;
3329                        List<String> cols = ctePublishedColumns.get(bareLower);
3330                        if (cols != null) {
3331                            result.put(aliasKey, cols);
3332                        }
3333                        return;
3334                    }
3335                }
3336            }
3337        }
3338        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery) {
3339            return;
3340        }
3341        if (subqueryAliasToIndex == null) {
3342            return;
3343        }
3344        String key = effectiveAliasLowerCaseOrNull(t);
3345        if (key == null) return;
3346        Integer idx = subqueryAliasToIndex.get(key);
3347        if (idx == null) return;
3348        result.put(key, outputColumnNames(stmts.get(idx)));
3349    }
3350
3351    /**
3352     * Slice 81 / slice 84 — admit single-target and joined
3353     * {@code DELETE} statements and produce a {@code "DELETE"}-kind
3354     * {@link StatementGraph} (§8.1.4 row D11 follow-up via slice 84's
3355     * joined-DELETE candidate (a)).
3356     *
3357     * <p>Structurally mirrors slice-80 + slice-82 + slice-83
3358     * {@link #buildUpdate} but with no SET clause and an empty
3359     * {@code outputColumns} list — DELETE has no projection of its
3360     * own (RETURNING / OUTPUT projections are deferred to a later
3361     * slice). The target relation is exposed via the slice-78
3362     * {@link TargetRelation} slot; its {@code columns} list is
3363     * intentionally empty because DELETE removes whole rows rather
3364     * than writing specific columns.
3365     *
3366     * <p>WHERE-side reads still surface on
3367     * {@link StatementGraph#getFilterColumnRefs()} so downstream
3368     * governance can audit "what predicates does this DELETE depend
3369     * on". Cross-statement {@link LineageEdge}s are NOT emitted (the
3370     * slice-78 / slice-80 {@code target.col_i ← STATEMENT_OUTPUT(…)}
3371     * contract has no DELETE analogue: there is no source
3372     * projection).
3373     *
3374     * <p>Slice 84 admit scope (lifts slice-81's blanket joined-DELETE
3375     * reject for the common PG / MSSQL FROM-side shapes; mirrors
3376     * slice 82 + slice 83 onto DELETE):
3377     * <ul>
3378     *   <li>PG / Snowflake / BQ / Redshift {@code DELETE FROM t USING
3379     *       source_list [WHERE]} — {@code source_list} = simple table,
3380     *       comma-separated tables, or chain of explicit JOIN ... ON
3381     *       (driver is taken from {@code referenceJoins}).</li>
3382     *   <li>MSSQL {@code DELETE FROM t FROM driver_table [JOIN other
3383     *       ON ...] [WHERE]} — the target may itself appear in the
3384     *       FROM-FROM clause as a different TTable instance.</li>
3385     *   <li>MSSQL {@code DELETE alias FROM t alias INNER JOIN ... ON …}
3386     *       — the alias-form DELETE where target is matched by alias.</li>
3387     *   <li>CROSS JOIN inside USING — no ON; semantically equivalent
3388     *       to comma-FROM.</li>
3389     *   <li>{@code DELETE FROM t USING (SELECT …) s [WHERE]} —
3390     *       FROM-subquery as a USING source; mirrors slice-83 UPDATE
3391     *       FROM-subquery extraction.</li>
3392     * </ul>
3393     *
3394     * <p>Slice 84 reject scope (preserves slice-81 reject coverage
3395     * for shapes that still need a refinement slice):
3396     * <ul>
3397     *   <li>{@link DiagnosticCode#DELETE_JOINED_NOT_SUPPORTED} — any
3398     *       shape with {@code delete.getJoins().size() &gt; 0}: MySQL
3399     *       multi-target {@code DELETE T1, T2 FROM …}, MySQL
3400     *       self-reference {@code DELETE T1 FROM T1}, MySQL
3401     *       multi-USING {@code DELETE FROM T1 USING T1, T2}.
3402     *       Candidates (c) and (d) in §8.1.4 lift these later.</li>
3403     *   <li>{@link DiagnosticCode#DELETE_FROM_JOIN_USING_NOT_SUPPORTED}
3404     *       — {@code USING(col1, col2)} on a FROM-side join item;
3405     *       mirror of slice-82 {@code UPDATE_FROM_JOIN_USING_*}.</li>
3406     *   <li>{@link DiagnosticCode#DELETE_FROM_JOIN_NATURAL_NOT_SUPPORTED}
3407     *       — {@code NATURAL JOIN} on a FROM-side join item.</li>
3408     *   <li>{@link DiagnosticCode#DELETE_FROM_NESTED_JOIN_NOT_SUPPORTED}
3409     *       — defensive: TTable wrapping a TJoin in the FROM source
3410     *       (not reached by any observed parser path on supported
3411     *       dialects, but kept distinct from the subquery code per
3412     *       slice-80 message-text-discrimination contract).</li>
3413     *   <li>{@link DiagnosticCode#DELETE_JOIN_ON_HAS_SUBQUERY_NOT_SUPPORTED}
3414     *       — subquery in a JOIN ON predicate.</li>
3415     * </ul>
3416     *
3417     * <p>Other rejected shapes (slice-81 baseline preserved):
3418     * {@link DiagnosticCode#DELETE_CTE_NOT_SUPPORTED},
3419     * {@link DiagnosticCode#DELETE_TARGET_MISSING},
3420     * {@link DiagnosticCode#DELETE_RETURNING_CLAUSE_NOT_SUPPORTED},
3421     * {@link DiagnosticCode#DELETE_OUTPUT_CLAUSE_NOT_SUPPORTED},
3422     * {@link DiagnosticCode#DELETE_ORDER_BY_OR_LIMIT_NOT_SUPPORTED}.
3423     *
3424     * <p>WHERE-side subqueries reuse the existing
3425     * {@link DiagnosticCode#WHERE_HAS_SUBQUERY_NOT_SUPPORTED} (no
3426     * new DELETE-side code) — consistent with slice-80 UPDATE WHERE
3427     * handling. Window functions in WHERE / ON reuse
3428     * {@link DiagnosticCode#CLAUSE_WINDOW_FUNCTION_LEAK}.
3429     *
3430     * <p>IR shape (slice 84 changes from slice 81):
3431     * <ul>
3432     *   <li>{@code relations[]} — now carries TABLE-kind
3433     *       {@link RelationSource}s for joined-DELETE FROM-side
3434     *       sources, plus SUBQUERY-kind sources for {@code USING
3435     *       (SELECT …)} extractions. Slice 81 left it empty.
3436     *       Reference-identity filter excludes the target's own
3437     *       TTable instance; the slice-82 walker-order swap (target
3438     *       before relations[] in
3439     *       {@link gudusoft.gsqlparser.ir.semantic.SqlSemanticAnalyzer#collectCatalogMissWarnings})
3440     *       handles same-qualified-name target+driver collisions
3441     *       (e.g. MSSQL {@code DELETE FROM t FROM t spqh JOIN sp}).</li>
3442     *   <li>{@code joinColumnRefs[]} — now carries ON-clause refs
3443     *       collected from each JoinItem under a per-DELETE
3444     *       {@link java.util.LinkedHashSet} for cross-JoinItem dedup
3445     *       (slice-82 codex round-1 Q2 BLOCKING precedent).</li>
3446     *   <li>The DELETE itself emits NO new cross-stmt
3447     *       {@link LineageEdge}s — empty {@code outputColumns[]}
3448     *       means there is no STATEMENT_OUTPUT(deleteIdx, …) anchor
3449     *       for slice-83's SUBQUERY-kind emitter. Extracted
3450     *       FROM-subqueries DO emit their own internal lineage edges
3451     *       via {@code emitLineageForStatement} inside
3452     *       {@link #processDirectSubqueryTable}.</li>
3453     * </ul>
3454     */
3455    public static SemanticProgram buildDelete(TDeleteSqlStatement delete,
3456                                              NameBindingProvider provider) {
3457        if (delete == null) {
3458            throw new IllegalArgumentException("delete must not be null");
3459        }
3460        if (provider == null) {
3461            throw new IllegalArgumentException("provider must not be null");
3462        }
3463
3464        // 1) Slice 106 — admit top-level WITH on DELETE. Walks the CTE
3465        // list left-to-right, building each body as a preceding
3466        // StatementGraph and producing cteNameToStatementIndex +
3467        // ctePublishedColumns for the FROM-as-CTE branch in
3468        // buildDeleteRelation below. Mirrors the slice-105 UPDATE
3469        // walker. `stmts` / `lineage` allocated here (hoisted from the
3470        // prior slice-84 location) so the CTE walker can append.
3471        // DELETE_CTE_NOT_SUPPORTED stays declared-but-unreached
3472        // (slice 71/72/82/86/95/96/97/98/99/100/101/102/103/104/105
3473        // precedent).
3474        List<StatementGraph> stmts = new ArrayList<>();
3475        List<LineageEdge> lineage = new ArrayList<>();
3476        Map<String, List<String>> ctePublishedColumns = new LinkedHashMap<>();
3477        Map<String, Integer> cteNameToStatementIndex = buildDeleteCteList(
3478                delete, provider, stmts, lineage, ctePublishedColumns);
3479
3480        // 2) Target table — defensive (parser usually rejects first).
3481        TTable targetTable = delete.getTargetTable();
3482        if (targetTable == null || targetTable.getTableName() == null) {
3483            throw new SemanticIRBuildException(Diagnostic.error(
3484                    DiagnosticCode.DELETE_TARGET_MISSING,
3485                    "DELETE statement has no resolvable target table",
3486                    delete));
3487        }
3488        String targetQName = targetTable.getTableName().toString();
3489        if (targetQName == null || targetQName.isEmpty()) {
3490            throw new SemanticIRBuildException(Diagnostic.error(
3491                    DiagnosticCode.DELETE_TARGET_MISSING,
3492                    "DELETE target table name is empty",
3493                    delete));
3494        }
3495
3496        // 3) Slice 84 / Slice 92 — joined-DELETE discriminator.
3497        // Parser-probe-verified shapes:
3498        //   - Admit (slice 84): PG `DELETE FROM t USING j` / MSSQL
3499        //     `DELETE FROM t FROM t spqh JOIN sp` / MSSQL `DELETE spqh
3500        //     FROM t spqh JOIN sp` / Snowflake DELETE-USING — all have
3501        //     joins.size=0 and referenceJoins.size > 0.
3502        //   - Admit (slice 92): MySQL `DELETE T1 FROM T1 [WHERE pred]`
3503        //     self-reference — joins.size=1, refJoins.size=1, and all
3504        //     three names (joins[0].table, refJoins[0].table, target)
3505        //     agree case-insensitively. Semantically identical to
3506        //     `DELETE FROM T1 [WHERE pred]`; produces the same IR shape.
3507        //   - Reject (slice-81 code preserved for non-self-ref):
3508        //     MySQL `DELETE T1, T2 FROM …` (joins.size=2) and
3509        //     MySQL `DELETE FROM T1 USING T1, T2` (refJoins.size=2).
3510        //
3511        // Slice 84 drops the slice-81 `tables.size > 1` and
3512        // `fromSourceJoin != null` blanket rejects (both fire for
3513        // admit shapes; probe confirms no parser-reachable shape
3514        // needs them when joins.size == 0). Candidate (d) in §8.1.4
3515        // (Hive multi-insert) remains open for a future slice.
3516        boolean mysqlSelfRef = false;
3517        if (delete.joins != null && delete.joins.size() > 0) {
3518            // Slice 92 — admit MySQL self-reference form:
3519            //   DELETE T1 FROM T1 [WHERE …]
3520            // The check requires all three names to match (codex
3521            // plan-review rounds Q1+Q5 BLOCKING fix: checking only
3522            // joins[0] is insufficient — DELETE T1 FROM T2 would
3523            // incorrectly admit because joins[0]=T1=target but
3524            // refJoins[0]=T2≠target).
3525            mysqlSelfRef = isMysqlSelfReferenceDelete(delete, targetQName);
3526            if (!mysqlSelfRef) {
3527                throw new SemanticIRBuildException(Diagnostic.error(
3528                        DiagnosticCode.DELETE_JOINED_NOT_SUPPORTED,
3529                        "DELETE with multi-target / multi-USING clause is "
3530                                + "not supported by SemanticIRBuilder.buildDelete; "
3531                                + "slice 84 admits PG `DELETE FROM t USING j` and "
3532                                + "MSSQL `DELETE FROM t FROM t JOIN s` shapes; "
3533                                + "slice 92 admits MySQL "
3534                                + "`DELETE T1 FROM T1 [WHERE …]` self-reference",
3535                        delete));
3536            }
3537        }
3538
3539        // 4) Slice 85 lifts the RETURNING / OUTPUT rejects on DELETE.
3540        // The cheap statement-level OUTPUT_INTO reject fires here so a
3541        // multi-violation shape routes to the cheaper structural code
3542        // first. {@code DELETE_RETURNING_CLAUSE_NOT_SUPPORTED} and
3543        // {@code DELETE_OUTPUT_CLAUSE_NOT_SUPPORTED} stay declared but
3544        // unreached (slice 71/72 retain-for-documentation precedent).
3545        if (delete.getOutputClause() != null
3546                && delete.getOutputClause().getIntoTable() != null) {
3547            throw new SemanticIRBuildException(Diagnostic.error(
3548                    DiagnosticCode.OUTPUT_INTO_NOT_SUPPORTED,
3549                    "DELETE OUTPUT ... INTO <target> writes a second target; "
3550                            + "slice 85 admits projection-only OUTPUT",
3551                    delete));
3552        }
3553        if (delete.getOrderByClause() != null
3554                || delete.getLimitClause() != null) {
3555            throw new SemanticIRBuildException(Diagnostic.error(
3556                    DiagnosticCode.DELETE_ORDER_BY_OR_LIMIT_NOT_SUPPORTED,
3557                    "DELETE with ORDER BY / LIMIT (MySQL) is not "
3558                            + "supported by SemanticIRBuilder.buildDelete; "
3559                            + "slice 81 admits no row-pruning on DELETE",
3560                    delete));
3561        }
3562
3563        // 4.7) Slice 84 — extract FROM-subqueries from referenceJoins
3564        // (after slice 106's CTE walker so the CTE bodies precede any
3565        // extracted FROM-subquery in the program). Mirrors slice-83
3566        // UPDATE FROM-subquery extraction (which uses
3567        // update.getJoins()); here we use delete.getReferenceJoins().
3568        // buildDelete owns fresh local stmts/lineage lists (allocated
3569        // in step 1 above) so exceptions propagate cleanly to the
3570        // caller — no snapshot/rollback wrapper.
3571        //
3572        // Slice 106 — forward cteNameToStatementIndex +
3573        // ctePublishedColumns so a nested SELECT inside an extracted
3574        // FROM-subquery body can resolve outer-WITH CTE references
3575        // (Resolver2 wires CTEScope; the maps are forwarded for parity
3576        // with the SELECT / MERGE / UPDATE call sites and so the
3577        // §N test for `USING (SELECT … FROM cte) sub` produces the
3578        // expected cross-stmt edge to the CTE body).
3579        //
3580        // Decorate the provider with the outer-WITH CTE name set so
3581        // the SELECT-side {@link #buildRelation} routes references to
3582        // those names through {@link RelationKind#CTE} (rather than
3583        // TABLE), which in turn makes
3584        // {@link #emitLineageForStatement} emit the cross-stmt
3585        // {@code STATEMENT_OUTPUT(subIdx,col) →
3586        // STATEMENT_OUTPUT(cteIdx,col)} edge required by §N. This
3587        // mirrors the SELECT-side outer-WITH walker
3588        // (see {@link #build}'s {@code outerProvider}).
3589        NameBindingProvider providerWithCte = cteNameToStatementIndex.isEmpty()
3590                ? provider
3591                : provider.withCteContext(cteNameToStatementIndex.keySet());
3592        Map<String, Integer> subqueryAliasToIndex =
3593                extractDeleteFromSubqueries(delete, providerWithCte, stmts, lineage,
3594                        cteNameToStatementIndex, ctePublishedColumns);
3595        Map<String, List<String>> deleteInScope = buildDeleteInScopeMap(
3596                delete, subqueryAliasToIndex, stmts,
3597                cteNameToStatementIndex, ctePublishedColumns);
3598        NameBindingProvider providerWithStar = deleteInScope.isEmpty()
3599                ? providerWithCte
3600                : providerWithCte.withInScopeRelationColumns(deleteInScope);
3601
3602        // 5) WHERE refs — slice 111 lifts the slice-81 blanket subquery
3603        // reject by routing uncorrelated predicate-subquery wrappers
3604        // (IN-SELECT / EXISTS / NOT EXISTS / scalar comparison /
3605        // ANY-ALL-SOME) through the slice-23+ JOIN-ON extraction pipeline
3606        // refactored by slice 110 to take a PredicateClauseContext. The
3607        // new DELETE_WHERE constant carries clause-specific
3608        // DiagnosticCode IDs (8 new DELETE_WHERE_* codes) and a
3609        // "DELETE WHERE clause" label. Each extracted wrapper lands as
3610        // its own <predicate_subquery_<i>> StatementGraph BEFORE the
3611        // DELETE (deleteIdx below = stmts.size() naturally accounts for
3612        // them — slice-83 dynamic-index pattern, slice 110 UPDATE
3613        // precedent). Remaining non-subquery refs flow into
3614        // filterColumnRefs via collectColumnRefsSkipping (or
3615        // collectColumnRefsTolerant on the slice-92 MySQL self-ref
3616        // path). Window functions in non-subquery subtrees still reject
3617        // via rejectWindowFunctionInScopeSkipping. Slice 84 —
3618        // providerWithStar so WHERE refs against extracted subquery
3619        // aliases bind correctly (slice-83 precedent).
3620        //
3621        // Slice 106 — providerWithCte (then providerWithStar on top of
3622        // it) already decorates the provider with the outer-WITH CTE
3623        // name set so the predicate body's inner SELECT routes
3624        // `FROM cte` refs through RelationKind.CTE and
3625        // emitLineageForStatement emits the
3626        // STATEMENT_OUTPUT(subIdx,col) → STATEMENT_OUTPUT(cteIdx,col)
3627        // cross-stmt edge (slice 110 UPDATE precedent).
3628        List<ColumnRef> filterRefs;
3629        TWhereClause where = delete.getWhereClause();
3630        if (where == null || where.getCondition() == null) {
3631            filterRefs = Collections.<ColumnRef>emptyList();
3632        } else {
3633            Set<TExpression> extractedWhereRoots =
3634                    Collections.<TExpression>emptySet();
3635            if (containsAnySubquery(where)) {
3636                extractedWhereRoots =
3637                        extractUncorrelatedPredicateSubqueriesFromClause(
3638                                where.getCondition(), providerWithStar,
3639                                stmts, lineage, cteNameToStatementIndex,
3640                                PredicateClauseContext.DELETE_WHERE);
3641                rejectAnyRemainingSubqueriesFromClause(
3642                        where.getCondition(), extractedWhereRoots,
3643                        PredicateClauseContext.DELETE_WHERE);
3644            }
3645            rejectWindowFunctionInScopeSkipping(where, "WHERE clause",
3646                    extractedWhereRoots);
3647            // Codex diff-review P1 fix: for MySQL self-reference DELETE the
3648            // MySQL parser puts 3 T1 instances in stmt.tables (target +
3649            // joins[0] + refJoins[0]), making Resolver2's inferredCandidates
3650            // see 3 candidates for any unqualified column → NOT_FOUND →
3651            // COLUMN_BINDING_NON_EXACT. Use a tolerant collector for the
3652            // self-ref path: EXACT_MATCH bindings (qualified refs) are
3653            // preserved verbatim; non-exact bindings emit the column ref with
3654            // the SQL-written qualifier (null for unqualified refs) instead of
3655            // throwing. Qualified refs like WHERE T1.id = 1 still get full
3656            // EXACT_MATCH treatment; only WHERE id = 1 (no qualifier) falls
3657            // back to the tolerant path. Slice 111 — both helpers now skip
3658            // extracted predicate-subquery subtrees so inner refs do not
3659            // leak into outer filterColumnRefs.
3660            filterRefs = mysqlSelfRef
3661                    ? collectColumnRefsTolerant(where, providerWithStar,
3662                            targetQName, extractedWhereRoots)
3663                    : collectColumnRefsSkipping(where, providerWithStar,
3664                            extractedWhereRoots);
3665        }
3666
3667        // 5.5) Slice 84 — walk delete.getReferenceJoins() to populate
3668        // relations[] (TABLE-kind FROM-side sources, target excluded
3669        // by reference identity; SUBQUERY-kind for USING (SELECT …))
3670        // and joinColumnRefs[] (ON-clause refs across all JoinItems).
3671        // Mirrors slice-82 buildUpdate's FROM walk, with the
3672        // `update.getJoins()` source replaced by
3673        // `delete.getReferenceJoins()`.
3674        //
3675        // Slice 92 — for MySQL self-reference DELETE T1 FROM T1,
3676        // refJoins[0] is the same table as the target; skip the loop
3677        // so relations[] stays empty (mirrors the slice-81 single-target
3678        // contract). Resolver2's ScopeBuilder has already registered
3679        // the FROM-clause table (including any alias) via the
3680        // `referenceJoins` walk in preVisit(TDeleteSqlStatement), so
3681        // WHERE refs resolve correctly even without a relations[] entry.
3682        List<RelationSource> relations = new ArrayList<>();
3683        // Slice-82 codex round-1 Q2 BLOCKING precedent — joinRefs
3684        // accumulates across multiple JoinItems in chained-JOIN
3685        // shapes. LinkedHashSet ensures cross-JoinItem dedup.
3686        java.util.LinkedHashSet<ColumnRef> joinRefsSet =
3687                new java.util.LinkedHashSet<>();
3688        TJoinList refJoins = delete.getReferenceJoins();
3689        if (!mysqlSelfRef && refJoins != null) {
3690            for (int ji = 0; ji < refJoins.size(); ji++) {
3691                TJoin join = refJoins.getJoin(ji);
3692                TTable leftTable = join.getTable();
3693                // Slice 106 — threads cteNameToStatementIndex so the
3694                // FROM-driver buildDeleteRelation call can route
3695                // objectname-typed CTE references to a SUBQUERY-kind
3696                // RelationSource pointing at the CTE statement.
3697                buildDeleteRelation(leftTable, targetTable, relations, delete,
3698                        cteNameToStatementIndex);
3699                TJoinItemList items = join.getJoinItems();
3700                if (items == null) continue;
3701                for (int i = 0; i < items.size(); i++) {
3702                    TJoinItem item = items.getJoinItem(i);
3703                    // Slice 106 — threads cteNameToStatementIndex through
3704                    // the JoinItem walker so JOIN-side CTE refs (MSSQL
3705                    // `FROM target t JOIN cte ON …`) get SUBQUERY-kind
3706                    // RelationSource emission.
3707                    buildDeleteJoinItem(item, targetTable, providerWithStar,
3708                            relations, joinRefsSet, delete,
3709                            cteNameToStatementIndex);
3710                }
3711            }
3712        }
3713        List<ColumnRef> joinRefs = new ArrayList<>(joinRefsSet);
3714
3715        // 6) Build the DELETE outer.
3716        // - relations[] may be non-empty for joined DELETE (slice 84);
3717        //   empty for single-target DELETE (slice 81 contract).
3718        // - target.columns empty by design — DELETE removes whole rows.
3719        RelationBinding targetBinding = new RelationBinding(
3720                RelationKind.TABLE, targetQName);
3721        TargetRelation target = new TargetRelation(
3722                targetBinding, Collections.<String>emptyList());
3723
3724        // Slice 85 — build RETURNING / OUTPUT projection columns BEFORE
3725        // the StatementGraph so the new returningColumns slot can be
3726        // populated. deleteIdx mirrors the slice-84 stmts.size() pattern.
3727        int deleteIdx = stmts.size();
3728        // DELETE target alias = effective alias from the target's
3729        // TTable (slice-84 convention).
3730        String deleteTargetAlias = effectiveAliasOf(targetTable);
3731        if (deleteTargetAlias == null || deleteTargetAlias.isEmpty()) {
3732            deleteTargetAlias = targetQName;
3733        }
3734        List<OutputColumn> returningColumns = buildReturningColumns(
3735                delete.getReturningClause(),
3736                delete.getOutputClause(),
3737                "DELETE",
3738                targetQName,
3739                deleteTargetAlias,
3740                /*targetTable=*/ targetTable,
3741                relations,
3742                providerWithStar,
3743                deleteIdx,
3744                lineage,
3745                delete);
3746
3747        StatementGraph deleteStmt = new StatementGraph(
3748                /*name=*/ null,
3749                "DELETE",
3750                relations,
3751                /*outputColumns=*/ Collections.<OutputColumn>emptyList(),
3752                returningColumns,
3753                filterRefs,
3754                joinRefs,
3755                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
3756                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
3757                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
3758                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
3759                /*distinct=*/ false,
3760                /*setOperator=*/ null,
3761                /*rowLimit=*/ null,
3762                target);
3763
3764        stmts.add(deleteStmt);
3765        // Slice 85 — extracted FROM-subqueries have already emitted
3766        // their own internal lineage edges into `lineage` via
3767        // processDirectSubqueryTable; buildReturningColumns also
3768        // already appended STATEMENT_OUTPUT(deleteIdx, retName) →
3769        // TABLE_COLUMN(targetQName, baseCol) edges above. No further
3770        // edges are needed.
3771        return new SemanticProgram(stmts, lineage);
3772    }
3773
3774    /**
3775     * Slice 94 — admit the single-target MERGE skeleton:
3776     * <pre>
3777     *     MERGE INTO target [AS] tgt
3778     *     USING (source_table | (SELECT ...) ) [AS] src
3779     *     ON  &lt;join condition&gt;
3780     *     WHEN MATCHED      [AND &lt;cond&gt;] THEN UPDATE SET c1 = expr1 [, ...]
3781     *     WHEN NOT MATCHED  [AND &lt;cond&gt;] THEN INSERT [(c1, ...)] VALUES (expr1, ...)
3782     *     WHEN MATCHED      [AND &lt;cond&gt;] THEN DELETE
3783     * </pre>
3784     *
3785     * <p>Emits one {@code "MERGE"}-kind {@link StatementGraph} carrying:
3786     * <ul>
3787     *   <li>{@link TargetRelation} on {@code getTarget()} only — slice
3788     *       78/80 contract: target lives on the dedicated target slot,
3789     *       NOT in {@code relations[]}. The slice-77/79 catalog walker
3790     *       fires the kind-discriminated "MERGE target relation 'X'"
3791     *       message via {@code targetWarnMessage("MERGE")}.</li>
3792     *   <li>{@code relations[]} = one entry for the USING source
3793     *       (TABLE-kind base table or SUBQUERY-kind aliased subquery).
3794     *       The slice-77 FROM walker fires "FROM relation 'X'" for
3795     *       missing source.</li>
3796     *   <li>{@code outputColumns[]} = empty (MERGE has no projection).</li>
3797     *   <li>{@code joinColumnRefs[]} = ON condition refs + per-WHEN AND
3798     *       condition refs, LinkedHashSet-deduplicated (slice 82
3799     *       pattern).</li>
3800     *   <li>{@code filterColumnRefs[]} = per-WHEN action WHERE refs
3801     *       (UPDATE WHERE, UPDATE...DELETE WHERE, INSERT WHERE; slice
3802     *       95). Empty when no action WHERE is present.</li>
3803     * </ul>
3804     *
3805     * <p>Per-WHEN action lineage:
3806     * <ul>
3807     *   <li>{@code WHEN MATCHED THEN UPDATE SET col_i = expr_i}: emit
3808     *       one {@link LineageEdge} per (target col, RHS source ref)
3809     *       pair as {@code TABLE_COLUMN(target,col) ← <ref>} — direct,
3810     *       no STATEMENT_OUTPUT intermediate (MERGE has no SELECT
3811     *       projection). Codex round-2 Q4 confirmed YES.</li>
3812     *   <li>{@code WHEN NOT MATCHED THEN INSERT (c1, ...) VALUES (e1, ...)}:
3813     *       same pattern — one edge per (insert col, source ref).</li>
3814     *   <li>{@code WHEN MATCHED THEN DELETE}: no per-column lineage
3815     *       (slice 81 DELETE contract).</li>
3816     *   <li>{@code WHEN MATCHED [AND <cond>] THEN DO NOTHING} (PG 15+,
3817     *       slice 96): admitted as a no-op action. No per-column
3818     *       lineage (slice 81 DELETE precedent). Per-WHEN AND
3819     *       condition refs still feed {@code joinColumnRefs[]} via
3820     *       the pre-dispatch block.</li>
3821     *   <li>{@code WHEN NOT MATCHED BY SOURCE [AND <cond>] THEN
3822     *       UPDATE SET ... | DELETE} (SQL Server, slice 97):
3823     *       admitted with the SQL Server semantic invariant that
3824     *       SET RHS and per-WHEN AND cond may not reference USING
3825     *       source columns (no source row exists when the action
3826     *       fires). Source-side refs reject with
3827     *       {@link DiagnosticCode#MERGE_NOT_MATCHED_BY_SOURCE_REFERENCES_SOURCE}.
3828     *       INSERT on BY SOURCE is parser-admitted but semantically
3829     *       invalid; rejects with
3830     *       {@link DiagnosticCode#MERGE_NOT_MATCHED_BY_SOURCE_INSERT_NOT_VALID}.
3831     *       UPDATE target self-refs ({@code t.a = t.b}) emit no
3832     *       lineage edges (slice-94 alias-filter convention; codex
3833     *       round-1 Q2 confirmed). PG 17+ BY SOURCE syntax still
3834     *       parses as type 2 plain NOT MATCHED in parser 4.1.5.0
3835     *       — that parser gap is not addressed in slice 97.</li>
3836     * </ul>
3837     *
3838     * <p>For USING-subquery, the inner SELECT is built via {@link #build}
3839     * and appended as a preceding {@link StatementGraph}; its inner
3840     * lineage edges are rebased by the current statement-list offset so
3841     * STATEMENT_OUTPUT indices stay valid (slice 78 INSERT pattern).
3842     *
3843     * <p>Resolver2 already handles MERGE via {@link gudusoft.gsqlparser.resolver2.scope.MergeScope}
3844     * — both USING base tables and USING subqueries surface as
3845     * {@code sourceTable + EXACT_MATCH} bindings on RHS / VALUES /
3846     * ON / WHEN-AND refs. Codex round-2 Q5 BLOCKING fix: we install
3847     * an explicit slice-83-style published-column map only for
3848     * USING subqueries (deterministic; cheap; matches the SELECT-
3849     * side FROM-subquery pattern even when redundant).
3850     */
3851    public static SemanticProgram buildMerge(TMergeSqlStatement merge,
3852                                             NameBindingProvider provider) {
3853        if (merge == null) {
3854            throw new IllegalArgumentException("merge must not be null");
3855        }
3856        if (provider == null) {
3857            throw new IllegalArgumentException("provider must not be null");
3858        }
3859        // Slice 94 — defensive UsingScope reset at entry. MERGE does
3860        // not produce its own UsingScope but a parent context might
3861        // (e.g. nested-statement contexts); mirrors slice 80 / 86
3862        // buildUpdate hygiene.
3863        provider = provider.withUsingScope(UsingScope.EMPTY);
3864
3865        // Slice 101 — hoist allocations earlier so buildMergeCteList can
3866        // append CTE bodies as preceding statements. The slice-94 reject
3867        // at this location is replaced by the CTE walker below.
3868        List<StatementGraph> stmts = new ArrayList<>();
3869        List<LineageEdge> lineage = new ArrayList<>();
3870
3871        // 1) Slice 101 — admit top-level WITH on MERGE. Walks CTE list
3872        // left-to-right, building each body as a preceding statement.
3873        // Produces cteNameToStatementIndex + ctePublishedColumns for the
3874        // USING-as-CTE branch below. Mirrors SELECT-side build() at
3875        // lines 516-653. `MERGE_CTE_NOT_SUPPORTED` stays declared-but-
3876        // unreached for API stability (slice 71/72/82/86/95/96/97/98/99/100
3877        // precedent).
3878        Map<String, List<String>> ctePublishedColumns = new LinkedHashMap<>();
3879        Map<String, Integer> cteNameToStatementIndex = buildMergeCteList(
3880                merge, provider, stmts, lineage, ctePublishedColumns);
3881
3882        // 2) Target table — defensive.
3883        TTable targetTable = merge.getTargetTable();
3884        if (targetTable == null || targetTable.getTableName() == null) {
3885            throw new SemanticIRBuildException(Diagnostic.error(
3886                    DiagnosticCode.MERGE_TARGET_MISSING,
3887                    "MERGE statement has no resolvable target table",
3888                    merge));
3889        }
3890        String targetQName = targetTable.getTableName().toString();
3891        if (targetQName == null || targetQName.isEmpty()) {
3892            throw new SemanticIRBuildException(Diagnostic.error(
3893                    DiagnosticCode.MERGE_TARGET_MISSING,
3894                    "MERGE target table name is empty",
3895                    merge));
3896        }
3897
3898        // 3) USING source — defensive.
3899        TTable usingTable = merge.getUsingTable();
3900        if (usingTable == null) {
3901            throw new SemanticIRBuildException(Diagnostic.error(
3902                    DiagnosticCode.MERGE_USING_SOURCE_MISSING,
3903                    "MERGE statement has no USING source",
3904                    merge));
3905        }
3906
3907        // 4) ON condition — defensive (parser usually rejects first).
3908        TExpression onCondition = merge.getCondition();
3909        if (onCondition == null) {
3910            throw new SemanticIRBuildException(Diagnostic.error(
3911                    DiagnosticCode.MERGE_ON_CONDITION_MISSING,
3912                    "MERGE statement has no ON condition",
3913                    merge));
3914        }
3915
3916        // 5) OUTPUT INTO / RETURNING / LIMIT / error logging rejects.
3917        // Slice 98 lifts MSSQL MERGE OUTPUT projection (non-INTO) via
3918        // the slice-85 buildReturningColumns walker; the actual call
3919        // is deferred until after step 8 because the walker needs the
3920        // populated relations[] (USING source). OUTPUT INTO continues
3921        // to reject (writes a second target). The RETURNING-clause
3922        // branch stays declared-but-unreached: PG parser drops
3923        // MERGE RETURNING silently, Oracle PARSE_FAILED, Couchbase
3924        // has no test reach (slice 71/72/82/86/95/96/97 precedent).
3925        if (merge.getOutputClause() != null
3926                && merge.getOutputClause().getIntoTable() != null) {
3927            throw new SemanticIRBuildException(Diagnostic.error(
3928                    DiagnosticCode.OUTPUT_INTO_NOT_SUPPORTED,
3929                    "MERGE OUTPUT ... INTO <target> writes a second "
3930                            + "target; slice 98 admits OUTPUT projection only",
3931                    merge));
3932        }
3933        if (merge.getReturningClause() != null) {
3934            throw new SemanticIRBuildException(Diagnostic.error(
3935                    DiagnosticCode.MERGE_RETURNING_CLAUSE_NOT_SUPPORTED,
3936                    "MERGE RETURNING projection (Oracle / Couchbase) is "
3937                            + "not supported by SemanticIRBuilder.buildMerge",
3938                    merge));
3939        }
3940        if (merge.getLimitClause() != null) {
3941            throw new SemanticIRBuildException(Diagnostic.error(
3942                    DiagnosticCode.MERGE_LIMIT_NOT_SUPPORTED,
3943                    "MERGE with LIMIT (Couchbase) is not supported by "
3944                            + "SemanticIRBuilder.buildMerge",
3945                    merge));
3946        }
3947        if (merge.getErrorLoggingClause() != null) {
3948            throw new SemanticIRBuildException(Diagnostic.error(
3949                    DiagnosticCode.MERGE_ERROR_LOGGING_NOT_SUPPORTED,
3950                    "MERGE LOG ERRORS INTO (Oracle) is not supported by "
3951                            + "SemanticIRBuilder.buildMerge",
3952                    merge));
3953        }
3954
3955        // 6) Build USING source RelationSource. If USING is a subquery,
3956        // extract it as a preceding StatementGraph and emit a SUBQUERY-
3957        // kind RelationSource that points at it; slice-83 pattern.
3958        // Otherwise emit a TABLE-kind RelationSource.
3959        // Slice 101 — `stmts` / `lineage` were hoisted to the top of
3960        // buildMerge so the CTE walker can append its preceding CTE
3961        // body statements first. Do NOT re-declare them here.
3962        String usingAlias = effectiveAliasOf(usingTable);
3963        if (usingAlias == null || usingAlias.isEmpty()) {
3964            usingAlias = (usingTable.getName() == null
3965                    || usingTable.getName().toString().isEmpty())
3966                    ? "__merge_using__"
3967                    : usingTable.getName().toString();
3968        }
3969        boolean usingIsSubquery = usingTable.getTableType()
3970                == gudusoft.gsqlparser.ETableSource.subquery;
3971        List<RelationSource> relations = new ArrayList<>();
3972        Map<String, List<String>> mergeInScope = new LinkedHashMap<>();
3973        NameBindingProvider providerWithStar = provider;
3974        // Slice 94 — alias resolution maps for the per-WHEN action
3975        // lineage emitter. TABLE-kind sources map alias → qualifiedName;
3976        // SUBQUERY-kind sources map alias → statement index of the
3977        // extracted inner SELECT. A SEPARATE `targetAliases` set
3978        // identifies refs whose relationAlias is the target alias
3979        // (codex round-1 diff Q1 BLOCKING — without this separation,
3980        // a self-merge where USING happens to share the target's name
3981        // would mis-classify the source alias as the target alias).
3982        Map<String, String> aliasToTableQName = new HashMap<>();
3983        Map<String, Integer> aliasToSubIdx = new HashMap<>();
3984        Set<String> targetAliases = new HashSet<>();
3985        String targetAlias = effectiveAliasOf(targetTable);
3986        if (targetAlias != null && !targetAlias.isEmpty()) {
3987            targetAliases.add(targetAlias.toLowerCase(Locale.ROOT));
3988        }
3989        targetAliases.add(targetQName.toLowerCase(Locale.ROOT));
3990        if (usingIsSubquery) {
3991            TSelectSqlStatement usingSelect = usingTable.getSubquery();
3992            if (usingSelect == null) {
3993                throw new SemanticIRBuildException(Diagnostic.error(
3994                        DiagnosticCode.MERGE_SOURCE_NOT_SUPPORTED,
3995                        "MERGE USING declared as subquery but no inner "
3996                                + "SELECT statement was attached",
3997                        merge));
3998            }
3999            // Slice 110 — known parity gap with slice-110 UPDATE-side
4000            // and slice-106 DELETE-side fixes: when the outer MERGE has
4001            // a CTE (slice 101) and the USING subquery body references
4002            // it (`MERGE INTO t USING (SELECT ... FROM cte) s ON ...`),
4003            // the `provider` passed here lacks `withCteContext`. The
4004            // proper fix is non-trivial because `build()` is the public
4005            // entry and creates a fresh local `cteNameToStatementIndex`
4006            // for the inner SELECT — adding `withCteContext` here would
4007            // classify the inner `cte` ref as CTE-kind but
4008            // `emitLineageForStatement` would then fail because the
4009            // inner build's own `cteNameToStatementIndex` is empty.
4010            // Fixing this requires plumbing the outer's CTE name+index
4011            // map into the inner build, similar to slice-93's
4012            // `appendOneHiveInsert` / slice-108's
4013            // `buildSelectBodyAfterCteWalk`. Deferred to a follow-up
4014            // slice; the rare shape currently produces correct REF
4015            // classification (Resolver2's CTEScope binds correctly)
4016            // but may miss the cross-stmt STATEMENT_OUTPUT edge to the
4017            // CTE body. Codex round-2 Q4 NO BLOCKING; addressed by
4018            // explicit documentation here.
4019            SemanticProgram inner = build(usingSelect, provider);
4020            int offset = stmts.size();
4021            stmts.addAll(inner.getStatements());
4022            for (LineageEdge e : inner.getLineage()) {
4023                lineage.add(rebaseLineageEdge(e, offset));
4024            }
4025            int subIdx = stmts.size() - 1;
4026            // Codex round-2 Q5 BLOCKING fix: install slice-83-style
4027            // in-scope map for USING subquery columns, scoped only to
4028            // the USING alias (codex round-3 Q2: ensure scoped to
4029            // USING alias only, no override of target / base-table).
4030            StatementGraph usingOuter = stmts.get(subIdx);
4031            List<String> publishedCols = new ArrayList<>();
4032            for (OutputColumn oc : usingOuter.getOutputColumns()) {
4033                if (oc.getName() != null && !oc.getName().isEmpty()) {
4034                    publishedCols.add(oc.getName());
4035                }
4036            }
4037            mergeInScope.put(
4038                    usingAlias.toLowerCase(Locale.ROOT), publishedCols);
4039            providerWithStar = provider.withInScopeRelationColumns(
4040                    mergeInScope);
4041            relations.add(new RelationSource(usingAlias,
4042                    new RelationBinding(RelationKind.SUBQUERY, usingAlias)));
4043            aliasToSubIdx.put(
4044                    usingAlias.toLowerCase(Locale.ROOT), subIdx);
4045        } else {
4046            // Slice 101 — USING-as-CTE detection. When MERGE has a WITH
4047            // clause and the USING bare name matches a CTE declared in
4048            // that WITH clause, route to a SUBQUERY-kind RelationSource
4049            // pointing at the CTE's already-built statement index. This
4050            // ensures:
4051            //   (a) lineage edges flow to STATEMENT_OUTPUT(cteIdx, col),
4052            //       not the fictitious TABLE_COLUMN(cteName, col);
4053            //   (b) the slice-77 catalog-miss WARN walker (which walks
4054            //       only TABLE-kind RelationSources) skips the CTE name;
4055            //   (c) Resolver2-bound CTE refs (probe 2026-05-17: status
4056            //       EXACT_MATCH with sourceTable=<cteName>) flow through
4057            //       the same emitMergeLineageEdge dispatch.
4058            // Case-insensitive lookup matches SQL identifier semantics.
4059            String usingBareName = (usingTable.getName() == null)
4060                    ? ""
4061                    : usingTable.getName().toString();
4062            String usingBareNameLower =
4063                    usingBareName.toLowerCase(Locale.ROOT);
4064            Integer cteIdx = usingBareNameLower.isEmpty()
4065                    ? null
4066                    : cteNameToStatementIndex.get(usingBareNameLower);
4067            if (cteIdx != null) {
4068                // USING references a declared CTE.
4069                List<String> publishedCols = ctePublishedColumns.get(
4070                        usingBareNameLower);
4071                if (publishedCols == null) {
4072                    publishedCols = new ArrayList<>();
4073                }
4074                mergeInScope.put(
4075                        usingAlias.toLowerCase(Locale.ROOT),
4076                        publishedCols);
4077                providerWithStar = provider.withInScopeRelationColumns(
4078                        mergeInScope);
4079                relations.add(new RelationSource(usingAlias,
4080                        new RelationBinding(
4081                                RelationKind.SUBQUERY, usingAlias)));
4082                aliasToSubIdx.put(
4083                        usingAlias.toLowerCase(Locale.ROOT), cteIdx);
4084                // Also register the bare CTE name in case the SQL
4085                // omits the alias (e.g. `USING src ON ...` with no
4086                // trailing alias). Mirrors the TABLE-kind branch
4087                // (line below) which also registers the bare name.
4088                aliasToSubIdx.put(usingBareNameLower, cteIdx);
4089            } else {
4090                // TABLE-kind USING — use the source table's qualified name
4091                // as the binding's qualifiedName so the slice-77 catalog
4092                // walker can find it.
4093                String usingQName = (usingTable.getTableName() == null)
4094                        ? usingAlias
4095                        : usingTable.getTableName().toString();
4096                relations.add(new RelationSource(usingAlias,
4097                        new RelationBinding(RelationKind.TABLE, usingQName)));
4098                aliasToTableQName.put(
4099                        usingAlias.toLowerCase(Locale.ROOT), usingQName);
4100                // Also register the bare name in case the SQL omits the
4101                // alias (e.g. `USING managers ON ...` without `s`).
4102                aliasToTableQName.put(
4103                        usingQName.toLowerCase(Locale.ROOT), usingQName);
4104            }
4105        }
4106
4107        // 7) Walk ON condition + per-WHEN AND conditions to build
4108        // joinColumnRefs[] with LinkedHashSet dedup (slice 82 pattern).
4109        // Reject ON-side subqueries: not supported in this slice; users
4110        // can still use a USING subquery for complex source logic.
4111        if (containsAnySubqueryExpression(onCondition)) {
4112            throw new SemanticIRBuildException(Diagnostic.error(
4113                    DiagnosticCode.MERGE_WHEN_CONDITION_HAS_SUBQUERY_NOT_SUPPORTED,
4114                    "MERGE ON condition contains a subquery; slice 94 "
4115                            + "admits scalar-only ON conditions",
4116                    merge));
4117        }
4118        rejectWindowFunctionInScope(onCondition, "MERGE ON condition");
4119        LinkedHashSet<ColumnRef> joinRefsSet = new LinkedHashSet<>();
4120        joinRefsSet.addAll(collectColumnRefs(onCondition, providerWithStar));
4121        // Slice 95 — per-WHEN action WHERE refs (UPDATE WHERE,
4122        // UPDATE...DELETE WHERE, INSERT WHERE) accumulate here.
4123        // Slice 94 left these refs silently dropped; slice 95 routes
4124        // them through filterColumnRefs[] (slice-80 UPDATE WHERE
4125        // precedent) — distinct from joinColumnRefs[] which holds
4126        // ON + WHEN-AND match conditions.
4127        LinkedHashSet<ColumnRef> filterRefsSet = new LinkedHashSet<>();
4128
4129        // 8) Per-WHEN clause loop. Validate type, dispatch to action
4130        // builder, accumulate joinColumnRefs and lineage edges.
4131        TargetRelation targetRel = null;
4132        List<String> targetColumnNames = new ArrayList<>();
4133        // Stable-order map: target col spelling (lower-cased) →
4134        // verbatim spelling encountered first. Iterating WHEN clauses
4135        // in order naturally produces SET-LHS first, then INSERT
4136        // column-list, matching the plan v3 column ordering rule.
4137        Map<String, String> seenTargetCols = new LinkedHashMap<>();
4138        // LineageEdge dedup spans the whole MERGE on
4139        // (target column lower-case, source ref lower-case key).
4140        Set<String> emittedEdgeKeys = new HashSet<>();
4141
4142        if (merge.getWhenClauses() == null
4143                || merge.getWhenClauses().size() == 0) {
4144            throw new SemanticIRBuildException(Diagnostic.error(
4145                    DiagnosticCode.MERGE_WHEN_NO_ACTION,
4146                    "MERGE statement has no WHEN clauses",
4147                    merge));
4148        }
4149        // Slice 116 — providerWithCteForActionWhere decorates
4150        // providerWithStar with withCteContext so the predicate body's
4151        // inner SELECT's `FROM cte` refs route through
4152        // RelationKind.CTE (slice 110 documented this is required for
4153        // emitLineageForStatement to emit STATEMENT_OUTPUT →
4154        // STATEMENT_OUTPUT edges into the CTE body). Hoisted here once
4155        // (cteNameToStatementIndex is finalized by line 3856 well
4156        // before the per-WHEN loop; recomputing per-WHEN would be
4157        // wasteful — codex diff-review Q1 advisory). The decoration
4158        // is scoped to collectMergeActionWhere only; providerWithStar
4159        // elsewhere stays unchanged so the WHEN AND condition (line
4160        // ~4153) and per-action SET/INSERT walkers see the original
4161        // provider — they already have their own slice-94 subquery
4162        // rejects so no asymmetric resolution surfaces.
4163        final NameBindingProvider providerWithCteForActionWhere =
4164                cteNameToStatementIndex.isEmpty()
4165                        ? providerWithStar
4166                        : providerWithStar.withCteContext(
4167                                cteNameToStatementIndex.keySet());
4168        // Slice 118 — build the MERGE correlation scope once (target +
4169        // USING source + outer CTEs) and thread through every per-WHEN
4170        // action WHERE call so correlated predicate subqueries promote
4171        // outer-aliased refs into OUTER_REFERENCE relations instead of
4172        // rejecting them. Mirrors the slice-117 pattern (UPDATE
4173        // SET-RHS correlated scalars). The scope is null-safe — every
4174        // value type inside flows from already-computed buildMerge
4175        // state (targetTable / usingTable / aliasToSubIdx /
4176        // cteNameToStatementIndex).
4177        final EnclosingScope mergeCorrelationScope =
4178                buildMergeEnclosingScope(merge, cteNameToStatementIndex,
4179                        aliasToSubIdx);
4180        for (int wi = 0; wi < merge.getWhenClauses().size(); wi++) {
4181            TMergeWhenClause when = merge.getWhenClauses().getElement(wi);
4182            // Slice 97 — BY SOURCE variants (SQL Server admits parser
4183            // types 7 / 8) are now admitted. PG 17+ syntax still parses
4184            // as type 2 (parser gap; slice 97 does not address). The
4185            // legacy MERGE_WHEN_NOT_MATCHED_BY_SOURCE_NOT_SUPPORTED
4186            // code stays declared-but-unreached (slice 71/72/82/86/95/96
4187            // precedent).
4188            boolean isNotMatchedBySource =
4189                    when.getType() == TMergeWhenClause.not_matched_by_source
4190                    || when.getType()
4191                        == TMergeWhenClause.not_matched_by_source_with_condition;
4192            // Per-WHEN AND condition (matched_with_condition,
4193            // not_matched_with_condition, not_matched_by_target_with_condition,
4194            // not_matched_by_source_with_condition).
4195            TExpression whenCond = when.getCondition();
4196            if (whenCond != null) {
4197                if (containsAnySubqueryExpression(whenCond)) {
4198                    throw new SemanticIRBuildException(Diagnostic.error(
4199                            DiagnosticCode.MERGE_WHEN_CONDITION_HAS_SUBQUERY_NOT_SUPPORTED,
4200                            "MERGE WHEN AND condition contains a subquery; "
4201                                    + "slice 94 admits scalar-only WHEN "
4202                                    + "conditions",
4203                            merge));
4204                }
4205                rejectWindowFunctionInScope(whenCond, "MERGE WHEN AND condition");
4206                List<ColumnRef> condRefs =
4207                        collectColumnRefs(whenCond, providerWithStar);
4208                // Slice 97 — BY SOURCE branches forbid source-side
4209                // refs in the AND condition (no source row exists).
4210                if (isNotMatchedBySource) {
4211                    rejectSourceRefsForBySource(condRefs, aliasToTableQName,
4212                            aliasToSubIdx, "MERGE WHEN NOT MATCHED BY SOURCE "
4213                                    + "AND condition", merge);
4214                }
4215                joinRefsSet.addAll(condRefs);
4216            }
4217            // Dispatch to action. Slice 96 — DO NOTHING is a no-op
4218            // action (PG 15+): no SET/INSERT VALUES, no per-column
4219            // lineage (slice-81 DELETE precedent). Per-WHEN AND
4220            // condition refs were already collected into joinRefsSet
4221            // above. MERGE_DO_NOTHING_NOT_SUPPORTED stays declared-
4222            // but-unreached for API stability (slice 71/72/82/86/95
4223            // precedent).
4224            boolean isDoNothingAction = when.getDoNothingClause() != null;
4225            TMergeUpdateClause upd = when.getUpdateClause();
4226            TMergeInsertClause ins = when.getInsertClause();
4227            boolean isDeleteAction = when.getDeleteClause() != null;
4228            if (upd == null && ins == null && !isDeleteAction
4229                    && !isDoNothingAction) {
4230                throw new SemanticIRBuildException(Diagnostic.error(
4231                        DiagnosticCode.MERGE_WHEN_NO_ACTION,
4232                        "MERGE WHEN clause #" + (wi + 1)
4233                                + " has no UPDATE / INSERT / DELETE / "
4234                                + "DO NOTHING action",
4235                        merge));
4236            }
4237            // Slice 95 — collect per-WHEN action WHERE refs into
4238            // filterRefsSet. Slice 116 — uncorrelated predicate-subquery
4239            // wrappers in those WHEREs now extract through the slice-23+
4240            // pipeline via PredicateClauseContext.MERGE_WHEN_WHERE
4241            // (mirrors slice 110-114 lifts on UPDATE / DELETE / SELECT /
4242            // set-op branch / CTE-body WHEREs). Window functions still
4243            // reject via rejectWindowFunctionInScopeSkipping (slice 95
4244            // contract preserved). MERGE_UPDATE_DELETE_WHERE_NOT_SUPPORTED
4245            // remains declared-but-unreached (slice 71/72/82/86
4246            // precedent). The providerWithCteForActionWhere decoration
4247            // is hoisted ABOVE the per-WHEN loop (codex diff-review Q1
4248            // advisory) since cteNameToStatementIndex is finalized
4249            // before the loop.
4250            if (upd != null) {
4251                collectMergeActionWhere(upd.getUpdateWhereClause(),
4252                        "MERGE WHEN action UPDATE WHERE",
4253                        providerWithCteForActionWhere, filterRefsSet,
4254                        stmts, lineage, cteNameToStatementIndex, merge,
4255                        mergeCorrelationScope);
4256                collectMergeActionWhere(upd.getDeleteWhereClause(),
4257                        "MERGE WHEN action DELETE WHERE",
4258                        providerWithCteForActionWhere, filterRefsSet,
4259                        stmts, lineage, cteNameToStatementIndex, merge,
4260                        mergeCorrelationScope);
4261            }
4262            if (ins != null) {
4263                collectMergeActionWhere(ins.getInsertWhereClause(),
4264                        "MERGE WHEN action INSERT WHERE",
4265                        providerWithCteForActionWhere, filterRefsSet,
4266                        stmts, lineage, cteNameToStatementIndex, merge,
4267                        mergeCorrelationScope);
4268            }
4269            // Slice 97 — BY SOURCE forbids INSERT semantically. MSSQL
4270            // parser admits the shape, so Semantic IR rejects.
4271            if (isNotMatchedBySource && ins != null) {
4272                throw new SemanticIRBuildException(Diagnostic.error(
4273                        DiagnosticCode.MERGE_NOT_MATCHED_BY_SOURCE_INSERT_NOT_VALID,
4274                        "MERGE WHEN NOT MATCHED BY SOURCE THEN INSERT is "
4275                                + "not a valid SQL Server action; INSERT only "
4276                                + "applies when the source row has no target "
4277                                + "match. Slice 97 admits UPDATE / DELETE on "
4278                                + "BY SOURCE branches.",
4279                        merge));
4280            }
4281            if (upd != null) {
4282                // Slice 97 — pre-walk SET RHS refs for BY SOURCE
4283                // branches to reject source-side refs before lineage
4284                // emission (Q1 in plan-review: keep helper branch-
4285                // agnostic; double-collect cost is bounded since
4286                // BY SOURCE UPDATEs are small).
4287                if (isNotMatchedBySource) {
4288                    rejectBySourceSetRhsRefs(upd, providerWithStar,
4289                            aliasToTableQName, aliasToSubIdx, merge);
4290                }
4291                buildMergeUpdateAction(upd, targetQName, targetTable,
4292                        providerWithStar, seenTargetCols, lineage,
4293                        emittedEdgeKeys, aliasToTableQName,
4294                        aliasToSubIdx, targetAliases, merge);
4295            }
4296            if (ins != null) {
4297                buildMergeInsertAction(ins, targetQName, targetTable,
4298                        providerWithStar, seenTargetCols, lineage,
4299                        emittedEdgeKeys, aliasToTableQName,
4300                        aliasToSubIdx, targetAliases, merge);
4301            }
4302            // DELETE action: no per-column lineage (slice 81 contract).
4303        }
4304
4305        // 9) Build TargetRelation from accumulated target column spellings.
4306        for (String spelling : seenTargetCols.values()) {
4307            targetColumnNames.add(spelling);
4308        }
4309        targetRel = new TargetRelation(
4310                new RelationBinding(RelationKind.TABLE, targetQName),
4311                targetColumnNames);
4312
4313        // 9.5) Slice 98 — MSSQL MERGE OUTPUT projection. Reuses the
4314        // slice-85 buildReturningColumns walker with dmlKind="MERGE":
4315        // Pass 1.5's INSERT/DELETE pseudo-table mismatch check naturally
4316        // skips (MERGE is action-polymorphic — INSERTED and DELETED
4317        // may both legitimately appear). The walker handles $action via
4318        // a slice-98-specific short-circuit (derived OutputColumn, no
4319        // sources, no edges). mergeIdx = stmts.size() because the MERGE
4320        // StatementGraph is appended below; for USING-subquery shapes,
4321        // step 6 has already appended the extracted SELECT so the index
4322        // points at the upcoming MERGE position (slice-83 dynamic-index
4323        // pattern). Pass relations[] as fromSideRelations so unique
4324        // USING-alias matches resolve to the source qname (codex Q4).
4325        int mergeIdx = stmts.size();
4326        String mergeTargetAlias = effectiveAliasOf(targetTable);
4327        if (mergeTargetAlias == null || mergeTargetAlias.isEmpty()) {
4328            mergeTargetAlias = targetQName;
4329        }
4330        List<OutputColumn> returningCols = buildReturningColumns(
4331                /*ret=*/ null,
4332                /*out=*/ merge.getOutputClause(),
4333                "MERGE",
4334                targetQName,
4335                mergeTargetAlias,
4336                targetTable,
4337                /*fromSideRelations=*/ relations,
4338                providerWithStar,
4339                mergeIdx,
4340                lineage,
4341                merge);
4342
4343        // 10) Emit StatementGraph. joinColumnRefs[] = ON + WHEN-AND refs.
4344        // Slice 95: filterColumnRefs[] = per-WHEN action WHERE refs
4345        // (UPDATE WHERE, UPDATE...DELETE WHERE, INSERT WHERE).
4346        // Slice 98: returningColumns[] = MERGE OUTPUT projection.
4347        List<ColumnRef> joinRefs = new ArrayList<>(joinRefsSet);
4348        List<ColumnRef> filterRefs = new ArrayList<>(filterRefsSet);
4349        StatementGraph mergeStmt = new StatementGraph(
4350                /*name=*/ null,
4351                "MERGE",
4352                relations,
4353                /*outputColumns=*/ Collections.<OutputColumn>emptyList(),
4354                returningCols,
4355                filterRefs,
4356                joinRefs,
4357                /*groupByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
4358                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
4359                /*orderByColumnRefs=*/ Collections.<ColumnRef>emptyList(),
4360                /*distinctOnColumnRefs=*/ Collections.<ColumnRef>emptyList(),
4361                /*distinct=*/ false,
4362                /*setOperator=*/ null,
4363                /*rowLimit=*/ null,
4364                targetRel);
4365        stmts.add(mergeStmt);
4366
4367        return new SemanticProgram(stmts, lineage);
4368    }
4369
4370    /**
4371     * Slice 95 — collect column refs from a per-WHEN action WHERE
4372     * predicate ({@code TMergeUpdateClause.updateWhereClause},
4373     * {@code TMergeUpdateClause.deleteWhereClause}, or
4374     * {@code TMergeInsertClause.insertWhereClause}) into the supplied
4375     * {@code filterRefsSet}.
4376     *
4377     * <p>Slice 116 — lifts the slice-95 blanket subquery reject by
4378     * routing uncorrelated predicate-subquery wrappers (IN-SELECT /
4379     * EXISTS / NOT EXISTS / scalar comparison / ANY-ALL-SOME) through
4380     * the slice-23+ JOIN-ON extraction pipeline refactored by slice
4381     * 110 to take a {@link PredicateClauseContext}. The
4382     * {@link PredicateClauseContext#MERGE_WHEN_WHERE} constant reuses
4383     * the {@code SELECT_WHERE_*} DiagnosticCode family (slice 113/114
4384     * precedent — a MERGE-action WHERE IS a SELECT WHERE in shape) so
4385     * the enum count stays at 279; only the {@code clauseLabel}
4386     * differs so diagnostic messages can identify the MERGE-action
4387     * host context.
4388     *
4389     * <p>Each extracted wrapper lands as its own
4390     * {@code <predicate_subquery_<i>>} StatementGraph BEFORE the
4391     * MERGE statement (so {@code mergeIdx = stmts.size()} below in
4392     * {@code buildMerge} already accounts for them — slice-83
4393     * dynamic-index pattern, slice 110/111 precedent). Remaining
4394     * non-subquery refs flow into {@code filterRefsSet} via
4395     * {@link #collectColumnRefsSkipping}. Window functions in
4396     * non-subquery subtrees still reject via
4397     * {@link #rejectWindowFunctionInScopeSkipping} (slice 95
4398     * window-function contract preserved).
4399     *
4400     * <p>The supplied {@code provider} must carry
4401     * {@code withCteContext(cteMap.keySet())} so the predicate body's
4402     * inner SELECT's {@code FROM cte} refs route through
4403     * {@link RelationKind#CTE} — without that decoration,
4404     * {@code emitLineageForStatement} would lose the
4405     * STATEMENT_OUTPUT &rarr; STATEMENT_OUTPUT edge to the CTE body
4406     * (slice 110 documented gap for UPDATE WHERE — same applies here).
4407     * {@code buildMerge} composes the decoration once before the
4408     * per-WHEN loop.
4409     *
4410     * <p>Null-safe: returns immediately when the WHERE expression is
4411     * absent (slice-94 default; most WHEN clauses have no action
4412     * WHERE).
4413     */
4414    private static void collectMergeActionWhere(TExpression expr,
4415                                                String label,
4416                                                NameBindingProvider provider,
4417                                                LinkedHashSet<ColumnRef> filterRefsSet,
4418                                                List<StatementGraph> stmts,
4419                                                List<LineageEdge> lineage,
4420                                                Map<String, Integer> cteMap,
4421                                                TMergeSqlStatement merge,
4422                                                EnclosingScope correlationScope) {
4423        if (expr == null) {
4424            return;
4425        }
4426        Set<TExpression> extractedRoots = Collections.<TExpression>emptySet();
4427        if (containsAnySubqueryExpression(expr)) {
4428            extractedRoots =
4429                    extractUncorrelatedPredicateSubqueriesFromClause(
4430                            expr, provider, stmts, lineage, cteMap,
4431                            PredicateClauseContext.MERGE_WHEN_WHERE,
4432                            correlationScope);
4433            rejectAnyRemainingSubqueriesFromClause(expr, extractedRoots,
4434                    PredicateClauseContext.MERGE_WHEN_WHERE);
4435        }
4436        rejectWindowFunctionInScopeSkipping(expr, label, extractedRoots);
4437        filterRefsSet.addAll(collectColumnRefsSkipping(expr, provider,
4438                extractedRoots));
4439    }
4440
4441    /**
4442     * Slice 97 — reject any source-aliased column ref in a
4443     * WHEN NOT MATCHED BY SOURCE branch. SQL Server forbids
4444     * source-side references in this branch because there is no
4445     * matching source row when the action fires.
4446     *
4447     * <p>A ref is "source-aliased" iff its
4448     * {@code relationAlias.toLowerCase(Locale.ROOT)} appears in
4449     * either alias map (TABLE-kind or SUBQUERY-kind USING source).
4450     * Refs whose alias is unknown to both maps are assumed to be
4451     * target-bound (slice-94 alias-filter convention) and are
4452     * left alone.
4453     *
4454     * <p>Walks all refs so a source ref nested inside an arbitrary
4455     * function call (e.g. {@code COALESCE(s.code, t.code)}) is
4456     * caught — {@code collectColumnRefs} descends through arbitrary
4457     * scalar expressions (codex round-1 Q3 confirmed YES).
4458     */
4459    private static void rejectSourceRefsForBySource(List<ColumnRef> refs,
4460                                                    Map<String, String> aliasToTableQName,
4461                                                    Map<String, Integer> aliasToSubIdx,
4462                                                    String label,
4463                                                    TMergeSqlStatement merge) {
4464        if (refs == null || refs.isEmpty()) {
4465            return;
4466        }
4467        for (ColumnRef r : refs) {
4468            String alias = r.getRelationAlias();
4469            if (alias == null || alias.isEmpty()) {
4470                continue;
4471            }
4472            String key = alias.toLowerCase(Locale.ROOT);
4473            if (aliasToTableQName.containsKey(key)
4474                    || aliasToSubIdx.containsKey(key)) {
4475                throw new SemanticIRBuildException(Diagnostic.error(
4476                        DiagnosticCode.MERGE_NOT_MATCHED_BY_SOURCE_REFERENCES_SOURCE,
4477                        label + " references USING source column '"
4478                                + r + "'; WHEN NOT MATCHED BY SOURCE "
4479                                + "branches must only reference target "
4480                                + "columns or constants.",
4481                        merge));
4482            }
4483        }
4484    }
4485
4486    /**
4487     * Slice 97 — pre-walk the SET RHS of every assignment in a BY SOURCE
4488     * UPDATE action and reject any source-side ref. Called before
4489     * {@link #buildMergeUpdateAction} so the existing slice-94 helper
4490     * remains BY-SOURCE-agnostic.
4491     *
4492     * <p>Skips assignments that aren't shaped as a simple
4493     * {@code assignment_t} expression; those defects are caught by
4494     * {@link #buildMergeUpdateAction} with
4495     * {@link DiagnosticCode#UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED}.
4496     */
4497    private static void rejectBySourceSetRhsRefs(TMergeUpdateClause upd,
4498                                                 NameBindingProvider provider,
4499                                                 Map<String, String> aliasToTableQName,
4500                                                 Map<String, Integer> aliasToSubIdx,
4501                                                 TMergeSqlStatement merge) {
4502        TResultColumnList sets = upd.getUpdateColumnList();
4503        if (sets == null || sets.size() == 0) {
4504            return;
4505        }
4506        for (int i = 0; i < sets.size(); i++) {
4507            TResultColumn rc = sets.getResultColumn(i);
4508            TExpression assignment = (rc == null) ? null : rc.getExpr();
4509            if (assignment == null
4510                    || assignment.getExpressionType() != EExpressionType.assignment_t) {
4511                continue;
4512            }
4513            TExpression rhs = assignment.getRightOperand();
4514            if (rhs == null) {
4515                continue;
4516            }
4517            // Subquery RHS would short-circuit later with
4518            // UPDATE_SET_HAS_SUBQUERY_NOT_SUPPORTED; ignore here.
4519            if (containsAnySubqueryExpression(rhs)) {
4520                continue;
4521            }
4522            List<ColumnRef> rhsRefs = collectColumnRefs(rhs, provider);
4523            rejectSourceRefsForBySource(rhsRefs, aliasToTableQName,
4524                    aliasToSubIdx,
4525                    "MERGE WHEN NOT MATCHED BY SOURCE UPDATE SET "
4526                            + "assignment #" + (i + 1) + " RHS",
4527                    merge);
4528        }
4529    }
4530
4531    /**
4532     * Slice 94 — process one WHEN MATCHED THEN UPDATE SET action.
4533     * Each {@code TResultColumn} carries an assignment_t TExpression
4534     * whose leftOperand is the SET LHS (target column reference) and
4535     * whose rightOperand is the value expression. We emit one
4536     * {@link LineageEdge} per (target col, RHS source ref) pair.
4537     */
4538    private static void buildMergeUpdateAction(TMergeUpdateClause upd,
4539                                               String targetQName,
4540                                               TTable targetTable,
4541                                               NameBindingProvider provider,
4542                                               Map<String, String> seenTargetCols,
4543                                               List<LineageEdge> lineage,
4544                                               Set<String> emittedEdgeKeys,
4545                                               Map<String, String> aliasToTableQName,
4546                                               Map<String, Integer> aliasToSubIdx,
4547                                               Set<String> targetAliases,
4548                                               TMergeSqlStatement merge) {
4549        TResultColumnList sets = upd.getUpdateColumnList();
4550        if (sets == null || sets.size() == 0) {
4551            return;
4552        }
4553        for (int i = 0; i < sets.size(); i++) {
4554            TResultColumn rc = sets.getResultColumn(i);
4555            TExpression assignment = (rc == null) ? null : rc.getExpr();
4556            if (assignment == null
4557                    || assignment.getExpressionType() != EExpressionType.assignment_t) {
4558                throw new SemanticIRBuildException(Diagnostic.error(
4559                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4560                        "MERGE WHEN MATCHED UPDATE SET assignment #" + (i + 1)
4561                                + " is not a simple column-value assignment_t",
4562                        merge));
4563            }
4564            TExpression lhs = assignment.getLeftOperand();
4565            TExpression rhs = assignment.getRightOperand();
4566            if (lhs == null || rhs == null) {
4567                throw new SemanticIRBuildException(Diagnostic.error(
4568                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4569                        "MERGE WHEN MATCHED UPDATE SET assignment #" + (i + 1)
4570                                + " is missing an operand",
4571                        merge));
4572            }
4573            if (lhs.getExpressionType() == EExpressionType.list_t) {
4574                throw new SemanticIRBuildException(Diagnostic.error(
4575                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4576                        "MERGE WHEN MATCHED UPDATE SET tuple assignment "
4577                                + "'(a, b) = ...' is not supported",
4578                        merge));
4579            }
4580            if (lhs.getExpressionType() != EExpressionType.simple_object_name_t) {
4581                throw new SemanticIRBuildException(Diagnostic.error(
4582                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4583                        "MERGE WHEN MATCHED UPDATE SET assignment #" + (i + 1)
4584                                + " LHS is expressionType=" + lhs.getExpressionType()
4585                                + "; slice 94 admits simple column references only",
4586                        merge));
4587            }
4588            TObjectName targetCol = lhs.getObjectOperand();
4589            if (targetCol == null) {
4590                throw new SemanticIRBuildException(Diagnostic.error(
4591                        DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4592                        "MERGE WHEN MATCHED UPDATE SET assignment #" + (i + 1)
4593                                + " LHS has no TObjectName operand",
4594                        merge));
4595            }
4596            // Codex round-1 diff Q3 NO fix: SET LHS qualifier must be
4597            // either the target alias or the target qualified name.
4598            // A foreign qualifier (e.g. `s.name` on a SET LHS pointing
4599            // at source `s`) silently treated as a target column would
4600            // produce a wrong target column spelling.
4601            String rawSpelling = targetCol.toString();
4602            String colSpelling = validateAndStripSetLhsQualifier(
4603                    rawSpelling, targetTable, targetQName, merge);
4604            // Subquery / window on RHS — reuse existing codes per
4605            // plan v3 §B (codex round-1 Q2 NO fix).
4606            if (containsAnySubqueryExpression(rhs)) {
4607                throw new SemanticIRBuildException(Diagnostic.error(
4608                        DiagnosticCode.UPDATE_SET_HAS_SUBQUERY_NOT_SUPPORTED,
4609                        "MERGE WHEN MATCHED UPDATE SET assignment #" + (i + 1)
4610                                + " right-hand side contains a subquery; "
4611                                + "slice 94 admits scalar-only RHS expressions",
4612                        merge));
4613            }
4614            rejectWindowFunctionInScope(rhs, "MERGE WHEN MATCHED UPDATE SET RHS");
4615
4616            String lowerKey = colSpelling.toLowerCase(Locale.ROOT);
4617            if (!seenTargetCols.containsKey(lowerKey)) {
4618                seenTargetCols.put(lowerKey, colSpelling);
4619            }
4620            // Per-WHEN action lineage: TABLE_COLUMN(target,col) ← <RHS ref>
4621            List<ColumnRef> rhsRefs = collectColumnRefs(rhs, provider);
4622            for (ColumnRef src : rhsRefs) {
4623                emitMergeLineageEdge(targetQName, colSpelling, src,
4624                        lineage, emittedEdgeKeys, aliasToTableQName,
4625                        aliasToSubIdx, targetAliases);
4626            }
4627        }
4628    }
4629
4630    /**
4631     * Slice 94 — process one WHEN NOT MATCHED THEN INSERT (cols) VALUES (exprs)
4632     * action. Emits one {@link LineageEdge} per (insert col, source ref)
4633     * pair, plus arity validation between the explicit column list and
4634     * VALUES list. If the column list is omitted, we cannot derive
4635     * target column names — the slice rejects defensively.
4636     */
4637    private static void buildMergeInsertAction(TMergeInsertClause ins,
4638                                               String targetQName,
4639                                               TTable targetTable,
4640                                               NameBindingProvider provider,
4641                                               Map<String, String> seenTargetCols,
4642                                               List<LineageEdge> lineage,
4643                                               Set<String> emittedEdgeKeys,
4644                                               Map<String, String> aliasToTableQName,
4645                                               Map<String, Integer> aliasToSubIdx,
4646                                               Set<String> targetAliases,
4647                                               TMergeSqlStatement merge) {
4648        TResultColumnList values = ins.getValuelist();
4649        gudusoft.gsqlparser.nodes.TObjectNameList colList = ins.getColumnList();
4650        if (values == null || values.size() == 0) {
4651            throw new SemanticIRBuildException(Diagnostic.error(
4652                    DiagnosticCode.MERGE_INSERT_DEFAULT_VALUES_NOT_SUPPORTED,
4653                    "MERGE WHEN NOT MATCHED INSERT has no VALUES list "
4654                            + "(DEFAULT VALUES / row-type forms not supported)",
4655                    merge));
4656        }
4657        // If an explicit column list is present, validate arity.
4658        if (colList != null && colList.size() > 0) {
4659            if (colList.size() != values.size()) {
4660                throw new SemanticIRBuildException(Diagnostic.error(
4661                        DiagnosticCode.INSERT_COLUMN_COUNT_MISMATCH,
4662                        "MERGE WHEN NOT MATCHED INSERT column list has "
4663                                + colList.size() + " column(s) but VALUES "
4664                                + "list has " + values.size(),
4665                        merge));
4666            }
4667        }
4668        for (int i = 0; i < values.size(); i++) {
4669            TResultColumn rc = values.getResultColumn(i);
4670            TExpression rhs = (rc == null) ? null : rc.getExpr();
4671            if (rhs == null) {
4672                throw new SemanticIRBuildException(Diagnostic.error(
4673                        DiagnosticCode.MERGE_INSERT_DEFAULT_VALUES_NOT_SUPPORTED,
4674                        "MERGE WHEN NOT MATCHED INSERT VALUES item #"
4675                                + (i + 1) + " has no expression",
4676                        merge));
4677            }
4678            if (containsAnySubqueryExpression(rhs)) {
4679                throw new SemanticIRBuildException(Diagnostic.error(
4680                        DiagnosticCode.MERGE_INSERT_VALUES_HAS_SUBQUERY_NOT_SUPPORTED,
4681                        "MERGE WHEN NOT MATCHED INSERT VALUES item #"
4682                                + (i + 1) + " contains a subquery; slice 94 "
4683                                + "admits scalar-only VALUES expressions",
4684                        merge));
4685            }
4686            rejectWindowFunctionInScope(rhs, "MERGE INSERT VALUES");
4687
4688            // Target column name. If the explicit column list is
4689            // omitted, slice 94 does not synthesize positional target
4690            // column names (the catalog is required to map by
4691            // position; the current builder does not consume catalog
4692            // ordering). We still emit lineage edges from a synth
4693            // "__merge_insert_pos_<i>__" target column so source refs
4694            // are observable; users with an explicit column list get
4695            // the verbatim spelling.
4696            String colSpelling;
4697            if (colList != null && colList.size() > 0) {
4698                TObjectName col = colList.getObjectName(i);
4699                // INSERT column list is conventionally bare (no
4700                // qualifier), but Oracle / SQL Server allow
4701                // target-qualified spellings; strip them and reject
4702                // foreign qualifiers (codex round-1 diff Q3 NO fix).
4703                colSpelling = (col == null) ? ("__merge_insert_pos_" + i + "__")
4704                        : validateAndStripSetLhsQualifier(col.toString(),
4705                                targetTable, targetQName, merge);
4706            } else {
4707                colSpelling = "__merge_insert_pos_" + i + "__";
4708            }
4709            String lowerKey = colSpelling.toLowerCase(Locale.ROOT);
4710            if (!seenTargetCols.containsKey(lowerKey)) {
4711                seenTargetCols.put(lowerKey, colSpelling);
4712            }
4713            List<ColumnRef> rhsRefs = collectColumnRefs(rhs, provider);
4714            for (ColumnRef src : rhsRefs) {
4715                emitMergeLineageEdge(targetQName, colSpelling, src,
4716                        lineage, emittedEdgeKeys, aliasToTableQName,
4717                        aliasToSubIdx, targetAliases);
4718            }
4719        }
4720    }
4721
4722    /**
4723     * Slice 94 — emit a single MERGE per-WHEN action lineage edge:
4724     * {@code TABLE_COLUMN(targetQName, colSpelling) ← <src ref>}.
4725     * Deduplicates on a lower-case key so the same (target column,
4726     * source ref) pair appearing in multiple WHEN clauses produces
4727     * one edge (codex round-2 Q4 confirmed YES on this dedup
4728     * strategy).
4729     */
4730    private static void emitMergeLineageEdge(String targetQName,
4731                                             String colSpelling,
4732                                             ColumnRef src,
4733                                             List<LineageEdge> lineage,
4734                                             Set<String> emittedEdgeKeys,
4735                                             Map<String, String> aliasToTableQName,
4736                                             Map<String, Integer> aliasToSubIdx,
4737                                             Set<String> targetAliases) {
4738        if (src == null || colSpelling == null || colSpelling.isEmpty()) {
4739            return;
4740        }
4741        String srcAlias = src.getRelationAlias();
4742        String srcCol = src.getColumnName();
4743        if (srcCol == null || srcCol.isEmpty()) {
4744            return;
4745        }
4746        if (srcAlias == null) srcAlias = "";
4747        String aliasKey = srcAlias.toLowerCase(Locale.ROOT);
4748        // Codex round-1 diff Q1 BLOCKING fix: skip only if the alias
4749        // is a target alias, NOT if the alias resolves to a same-named
4750        // table. Self-merge (USING with same name as target) must
4751        // distinguish target from USING by alias identity, not by
4752        // resolved table name.
4753        if (targetAliases.contains(aliasKey)) {
4754            return;
4755        }
4756        // Map alias → LineageRef (TABLE_COLUMN or STATEMENT_OUTPUT).
4757        LineageRef toRef;
4758        if (aliasToSubIdx.containsKey(aliasKey)) {
4759            toRef = LineageRef.statementOutput(
4760                    aliasToSubIdx.get(aliasKey), srcCol);
4761        } else if (aliasToTableQName.containsKey(aliasKey)) {
4762            toRef = LineageRef.tableColumn(
4763                    aliasToTableQName.get(aliasKey), srcCol);
4764        } else {
4765            // Unknown alias — skip to avoid emitting a bogus edge.
4766            // The ref still surfaces on joinColumnRefs[] (ON / WHEN-AND).
4767            return;
4768        }
4769        // Codex round-1 diff Q2 NO fix: dedup on resolved LineageRef
4770        // identity (not raw alias), so `s.name` (alias) and
4771        // `managers.name` (qualified name) coming from the SAME
4772        // resolved source produce ONE edge. The key embeds the toRef's
4773        // canonical form: STATEMENT_OUTPUT(idx,col) or
4774        // TABLE_COLUMN(qname,col) — both are lower-cased.
4775        String resolvedKey;
4776        if (toRef.getKind() == LineageRef.Kind.STATEMENT_OUTPUT) {
4777            resolvedKey = "STMT_OUT::" + toRef.getStatementIndex() + "::"
4778                    + (toRef.getOutputName() == null ? "" : toRef.getOutputName());
4779        } else {
4780            resolvedKey = "TBL_COL::"
4781                    + (toRef.getQualifiedName() == null ? "" : toRef.getQualifiedName())
4782                    + "::"
4783                    + (toRef.getColumnName() == null ? "" : toRef.getColumnName());
4784        }
4785        String key = (targetQName + "::" + colSpelling + "::"
4786                + resolvedKey).toLowerCase(Locale.ROOT);
4787        if (emittedEdgeKeys.add(key)) {
4788            lineage.add(new LineageEdge(
4789                    LineageRef.tableColumn(targetQName, colSpelling),
4790                    toRef));
4791        }
4792    }
4793
4794    /**
4795     * Slice 94 — validate the SET LHS / INSERT column-list spelling
4796     * and strip a leading target qualifier. Codex round-1 diff Q3 NO
4797     * fix: previously the helper returned a foreign-qualified spelling
4798     * unchanged, which silently produced a wrong target column (e.g.
4799     * {@code "s.name"} would land in the target columns list verbatim
4800     * instead of being rejected).
4801     *
4802     * <p>Admit rules:
4803     * <ul>
4804     *   <li>Unqualified bare name: return unchanged.</li>
4805     *   <li>Qualified by target alias or qualified name: strip.</li>
4806     *   <li>Qualified by anything else: reject as
4807     *       {@link DiagnosticCode#UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED}
4808     *       (the same code used for slice-80 UPDATE-LHS shape rejects,
4809     *       message text discriminates by mentioning the foreign
4810     *       qualifier).</li>
4811     * </ul>
4812     */
4813    private static String validateAndStripSetLhsQualifier(String spelling,
4814                                                          TTable targetTable,
4815                                                          String targetQName,
4816                                                          TMergeSqlStatement merge) {
4817        if (spelling == null) return spelling;
4818        int dot = spelling.indexOf('.');
4819        if (dot <= 0) return spelling;
4820        String qualifier = spelling.substring(0, dot);
4821        String bare = spelling.substring(dot + 1);
4822        String targetAlias = effectiveAliasOf(targetTable);
4823        if (targetAlias != null
4824                && qualifier.equalsIgnoreCase(targetAlias)) {
4825            return bare;
4826        }
4827        if (qualifier.equalsIgnoreCase(targetQName)) {
4828            return bare;
4829        }
4830        throw new SemanticIRBuildException(Diagnostic.error(
4831                DiagnosticCode.UPDATE_TUPLE_ASSIGNMENT_NOT_SUPPORTED,
4832                "MERGE SET LHS / INSERT column qualifier '" + qualifier
4833                        + "' does not match the target table; slice 94 "
4834                        + "admits target-qualified or unqualified target "
4835                        + "column references only",
4836                merge));
4837    }
4838
4839    /**
4840     * Slice 84 — process one FROM-side source table for joined
4841     * {@link #buildDelete}. Mirrors slice-82 {@link #buildUpdateRelation}.
4842     * Applies the slice-84 reject contract for nested-join wrappers,
4843     * then appends a TABLE-kind {@link RelationSource} unless the
4844     * table is the target (reference-identity filter — clean IR
4845     * semantics: relations[] models read-side sources only). For
4846     * subquery sources (already extracted in step 4.7), publishes a
4847     * SUBQUERY-kind {@link RelationSource} so the inScope-enhanced
4848     * provider can route {@code sub.col} references.
4849     *
4850     * <p>Null-driver guard: probed PG
4851     * {@code DELETE FROM e USING (t1 JOIN t2 ON …)} returns
4852     * {@code refJoin[0].getTable() == null}. Silent skip mirrors
4853     * slice-82's null guard for the analogous UPDATE case;
4854     * documented as a known limitation (parenthesized JOIN-in-USING
4855     * is opaque to {@code relations[]} though WHERE refs still bind
4856     * via Resolver2).
4857     */
4858    private static void buildDeleteRelation(TTable t, TTable targetTable,
4859                                            List<RelationSource> relations,
4860                                            TDeleteSqlStatement delete,
4861                                            Map<String, Integer> cteNameToStatementIndex) {
4862        if (t == null) {
4863            return; // defensive — parenthesized JOIN-in-USING surfaces null
4864        }
4865        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
4866            // Slice 84 — admit FROM-side subqueries. The inner SELECT
4867            // has already been extracted as its own StatementGraph by
4868            // extractDeleteFromSubqueries. Publish a SUBQUERY-kind
4869            // RelationSource so the inScope-enhanced provider routes
4870            // `sub.col` references correctly. Mirrors slice-83
4871            // buildUpdateRelation's subquery branch.
4872            String subAlias = effectiveAliasOf(t);
4873            if (subAlias != null && !subAlias.isEmpty()) {
4874                relations.add(new RelationSource(subAlias,
4875                        new RelationBinding(RelationKind.SUBQUERY, subAlias)));
4876            }
4877            return;
4878        }
4879        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.join) {
4880            // Defensive: TTable wrapping a TJoin. Not reached by any
4881            // observed parser path on supported dialects (slice-82
4882            // precedent — parenthesized JOIN-in-USING surfaces a null
4883            // driver, not a join-typed TTable). Distinct DiagnosticCode
4884            // per slice-80's message-text-discrimination contract.
4885            throw new SemanticIRBuildException(Diagnostic.error(
4886                    DiagnosticCode.DELETE_FROM_NESTED_JOIN_NOT_SUPPORTED,
4887                    "DELETE FROM source is a nested join wrapper; "
4888                            + "slice 84 admits simple table / subquery "
4889                            + "FROM sources only",
4890                    delete));
4891        }
4892        // Reference-identity filter: target's own TTable instance is
4893        // excluded from relations[]. Different TTable instances with
4894        // the same qualified name (e.g. MSSQL `DELETE FROM t FROM t
4895        // spqh JOIN sp` where target identity A and FROM-driver
4896        // identity B share name "t") both stay — the catalog-miss
4897        // WARN walker's pass-1-target-then-pass-2-relations ordering
4898        // (slice 83) deduplicates by qualified name.
4899        if (t == targetTable) {
4900            return;
4901        }
4902        TObjectName tName = t.getTableName();
4903        if (tName == null) {
4904            return; // defensive
4905        }
4906        // Slice 106 — FROM-side CTE detection. When the FROM-side table
4907        // is an objectname-typed reference whose bare name matches a
4908        // declared CTE in this DELETE's outer WITH clause, emit a
4909        // SUBQUERY-kind RelationSource pointing at the CTE statement
4910        // (mirrors slice-105 buildUpdateRelation). The slice-77 catalog-
4911        // miss WARN walker filters to RelationKind.TABLE so CTE-bound
4912        // relations are naturally skipped, even when the catalog also
4913        // declares the same name (slice-105 §G / §X precedent).
4914        //
4915        // Explicit objectname guard (codex round-1 NICE Q3): subquery /
4916        // join table types are handled by the early returns above; this
4917        // guard documents the contract and makes the branch resilient
4918        // if a future TTable type is added.
4919        if (cteNameToStatementIndex != null
4920                && !cteNameToStatementIndex.isEmpty()
4921                && t.getTableType()
4922                        == gudusoft.gsqlparser.ETableSource.objectname) {
4923            String bareName = tName.toString();
4924            if (bareName != null && !bareName.isEmpty()) {
4925                String bareNameLower = bareName.toLowerCase(Locale.ROOT);
4926                if (cteNameToStatementIndex.containsKey(bareNameLower)) {
4927                    String cteAlias = effectiveAliasOf(t);
4928                    if (cteAlias == null || cteAlias.isEmpty()) {
4929                        cteAlias = bareName;
4930                    }
4931                    relations.add(new RelationSource(cteAlias,
4932                            new RelationBinding(RelationKind.SUBQUERY, cteAlias)));
4933                    return;
4934                }
4935            }
4936        }
4937        relations.add(new RelationSource(effectiveAliasOf(t),
4938                new RelationBinding(RelationKind.TABLE, tName.toString())));
4939    }
4940
4941    /**
4942     * Slice 84 — process one {@link TJoinItem} for joined
4943     * {@link #buildDelete}. Mirrors slice-82 {@link #buildUpdateJoinItem}.
4944     * Applies the slice-84 reject contract for USING / NATURAL /
4945     * subquery-in-ON, processes the right-side table via
4946     * {@link #buildDeleteRelation}, and collects ON-clause column
4947     * refs into {@code joinRefs} via the shared
4948     * {@link #collectColumnRefs} helper.
4949     */
4950    private static void buildDeleteJoinItem(TJoinItem item, TTable targetTable,
4951                                            NameBindingProvider provider,
4952                                            List<RelationSource> relations,
4953                                            java.util.LinkedHashSet<ColumnRef> joinRefs,
4954                                            TDeleteSqlStatement delete,
4955                                            Map<String, Integer> cteNameToStatementIndex) {
4956        if (item == null) return;
4957        if (item.getUsingColumns() != null && item.getUsingColumns().size() > 0) {
4958            throw new SemanticIRBuildException(Diagnostic.error(
4959                    DiagnosticCode.DELETE_FROM_JOIN_USING_NOT_SUPPORTED,
4960                    "DELETE FROM join uses USING(...); slice 84 admits "
4961                            + "JOIN ON / CROSS JOIN / comma-FROM only",
4962                    item));
4963        }
4964        if (isNaturalJoinType(item.getJoinType())) {
4965            throw new SemanticIRBuildException(Diagnostic.error(
4966                    DiagnosticCode.DELETE_FROM_JOIN_NATURAL_NOT_SUPPORTED,
4967                    "DELETE FROM uses NATURAL JOIN; slice 84 admits "
4968                            + "JOIN ON / CROSS JOIN / comma-FROM only",
4969                    item));
4970        }
4971        // Right-side table: apply the same source-shape rejects +
4972        // identity filter as the driver table. Slice 106 — threads
4973        // cteNameToStatementIndex so right-side CTE refs (MSSQL
4974        // `FROM target t JOIN cte ON …`) get SUBQUERY-kind emission.
4975        buildDeleteRelation(item.getTable(), targetTable, relations, delete,
4976                cteNameToStatementIndex);
4977        // ON-clause refs: subquery rejects with slice-84 code;
4978        // window function reuses CLAUSE_WINDOW_FUNCTION_LEAK via the
4979        // shared helper. CROSS JOIN has no ON; skip the walk entirely.
4980        TExpression onCond = item.getOnCondition();
4981        if (onCond == null) return;
4982        if (containsAnySubqueryExpression(onCond)) {
4983            throw new SemanticIRBuildException(Diagnostic.error(
4984                    DiagnosticCode.DELETE_JOIN_ON_HAS_SUBQUERY_NOT_SUPPORTED,
4985                    "DELETE FROM JOIN ON condition contains a subquery; "
4986                            + "slice 84 admits scalar predicates only",
4987                    item));
4988        }
4989        rejectWindowFunctionInScope(onCond, "DELETE FROM JOIN ON");
4990        joinRefs.addAll(collectColumnRefs(onCond, provider));
4991    }
4992
4993    /**
4994     * Slice 84 — extract every FROM-side subquery in
4995     * {@code delete.getReferenceJoins()} as its own
4996     * {@link StatementGraph} appended to {@code stmts} before the
4997     * DELETE itself. Walks both the driver TTable of each TJoin AND
4998     * each JoinItem's right table. Returns an alias → stmts-index
4999     * map so the consuming DELETE can (a) build its in-scope column
5000     * map via {@link #buildDeleteInScopeMap}, and (b) bind
5001     * {@code sub.col} references in WHERE / ON via the
5002     * inScope-enhanced provider.
5003     *
5004     * <p>Mirrors slice-83 {@link #extractUpdateFromSubqueries} but
5005     * walks {@code delete.getReferenceJoins()} instead of
5006     * {@code update.getJoins()}. Reuses the SELECT-side
5007     * {@link #processDirectSubqueryTable} verbatim, forwarding the
5008     * outer-WITH {@code cteNameToStatementIndex} +
5009     * {@code ctePublishedColumns} (slice 106) so a nested SELECT in
5010     * an extracted FROM-subquery body can resolve outer-CTE refs.
5011     * Pre-slice-106 the maps were always empty because slice 81
5012     * rejected top-level WITH on DELETE
5013     * ({@link DiagnosticCode#DELETE_CTE_NOT_SUPPORTED}).
5014     *
5015     * <p>No mutation-guard wrapper: buildDelete owns fresh local
5016     * lists and exceptions propagate cleanly to the caller.
5017     */
5018    private static Map<String, Integer> extractDeleteFromSubqueries(
5019            TDeleteSqlStatement delete,
5020            NameBindingProvider provider,
5021            List<StatementGraph> stmts,
5022            List<LineageEdge> lineage,
5023            Map<String, Integer> cteNameToStatementIndex,
5024            Map<String, List<String>> ctePublishedColumns) {
5025        Map<String, Integer> aliasToIndex = new HashMap<>();
5026        TJoinList refJoins = delete.getReferenceJoins();
5027        if (refJoins == null) return aliasToIndex;
5028        // Slice 106 — forward the outer-WITH CTE maps so a nested SELECT
5029        // inside an extracted FROM-subquery body can resolve outer-WITH
5030        // CTE references. Resolver2 wires CTEScope; the maps are
5031        // forwarded for parity with the SELECT / MERGE / UPDATE call
5032        // sites and so the §N test for
5033        // `USING (SELECT … FROM cte) sub` produces the expected
5034        // cross-stmt lineage edge to the CTE body.
5035        Map<String, Integer> cteMap = cteNameToStatementIndex == null
5036                ? Collections.<String, Integer>emptyMap()
5037                : cteNameToStatementIndex;
5038        Map<String, List<String>> ctePublished = ctePublishedColumns == null
5039                ? Collections.<String, List<String>>emptyMap()
5040                : ctePublishedColumns;
5041        for (int ji = 0; ji < refJoins.size(); ji++) {
5042            TJoin join = refJoins.getJoin(ji);
5043            // Driver table — may be a subquery (PG / SF / BQ / RS
5044            // `DELETE FROM t USING (SELECT …) sub` shape).
5045            processDirectSubqueryTable(join.getTable(), provider,
5046                    stmts, lineage, cteMap, ctePublished, aliasToIndex);
5047            TJoinItemList items = join.getJoinItems();
5048            if (items == null) continue;
5049            for (int i = 0; i < items.size(); i++) {
5050                TJoinItem item = items.getJoinItem(i);
5051                if (item == null) continue;
5052                // Right-side table of a JoinItem — may be a subquery
5053                // (MSSQL / PG `DELETE FROM t FROM x JOIN (SELECT …)
5054                // sub ON …` shape).
5055                processDirectSubqueryTable(item.getTable(), provider,
5056                        stmts, lineage, cteMap, ctePublished, aliasToIndex);
5057            }
5058        }
5059        return aliasToIndex;
5060    }
5061
5062    /**
5063     * Slice 84 — build an effective-alias-keyed in-scope map publishing
5064     * each extracted DELETE FROM-subquery's output column names.
5065     * Mirrors slice-83 {@link #buildUpdateInScopeMap} but walks
5066     * {@code delete.getReferenceJoins()}.
5067     *
5068     * <p>Base-table FROM-side relations do not need an entry: their
5069     * column resolution stays on the Resolver2 catalog path
5070     * (probed correct for PG / MSSQL DELETE — see slice-84 plan
5071     * §Codex Q4 + Q11).
5072     */
5073    private static Map<String, List<String>> buildDeleteInScopeMap(
5074            TDeleteSqlStatement delete,
5075            Map<String, Integer> subqueryAliasToIndex,
5076            List<StatementGraph> stmts,
5077            Map<String, Integer> cteNameToStatementIndex,
5078            Map<String, List<String>> ctePublishedColumns) {
5079        Map<String, List<String>> result = new HashMap<>();
5080        boolean haveSubq = subqueryAliasToIndex != null
5081                && !subqueryAliasToIndex.isEmpty();
5082        boolean haveCte = cteNameToStatementIndex != null
5083                && !cteNameToStatementIndex.isEmpty();
5084        if (!haveSubq && !haveCte) {
5085            return result;
5086        }
5087        TJoinList refJoins = delete.getReferenceJoins();
5088        if (refJoins == null) return result;
5089        for (int ji = 0; ji < refJoins.size(); ji++) {
5090            TJoin join = refJoins.getJoin(ji);
5091            addDeleteRelationToInScopeMap(join.getTable(),
5092                    subqueryAliasToIndex, stmts, result,
5093                    cteNameToStatementIndex, ctePublishedColumns);
5094            TJoinItemList items = join.getJoinItems();
5095            if (items == null) continue;
5096            for (int i = 0; i < items.size(); i++) {
5097                TJoinItem item = items.getJoinItem(i);
5098                if (item == null) continue;
5099                addDeleteRelationToInScopeMap(item.getTable(),
5100                        subqueryAliasToIndex, stmts, result,
5101                        cteNameToStatementIndex, ctePublishedColumns);
5102            }
5103        }
5104        return result;
5105    }
5106
5107    private static void addDeleteRelationToInScopeMap(TTable t,
5108            Map<String, Integer> subqueryAliasToIndex,
5109            List<StatementGraph> stmts,
5110            Map<String, List<String>> result,
5111            Map<String, Integer> cteNameToStatementIndex,
5112            Map<String, List<String>> ctePublishedColumns) {
5113        if (t == null) return;
5114        // Slice 106 — CTE-as-FROM-relation in-scope publication. When
5115        // the FROM-side table is an objectname-typed reference whose
5116        // bare name matches a declared outer CTE, publish the CTE's
5117        // own column names against the FROM-side effective alias so
5118        // WHERE / ON / RETURNING refs against the CTE alias bind
5119        // correctly. Mirrors slice-105 addUpdateRelationToInScopeMap.
5120        if (cteNameToStatementIndex != null
5121                && !cteNameToStatementIndex.isEmpty()
5122                && ctePublishedColumns != null
5123                && t.getTableType()
5124                        == gudusoft.gsqlparser.ETableSource.objectname) {
5125            TObjectName tName = t.getTableName();
5126            if (tName != null) {
5127                String bare = tName.toString();
5128                if (bare != null && !bare.isEmpty()) {
5129                    String bareLower = bare.toLowerCase(Locale.ROOT);
5130                    if (cteNameToStatementIndex.containsKey(bareLower)) {
5131                        String aliasKey = effectiveAliasLowerCaseOrNull(t);
5132                        if (aliasKey == null) aliasKey = bareLower;
5133                        List<String> cols = ctePublishedColumns.get(bareLower);
5134                        if (cols != null) {
5135                            result.put(aliasKey, cols);
5136                        }
5137                        return;
5138                    }
5139                }
5140            }
5141        }
5142        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery) {
5143            return;
5144        }
5145        if (subqueryAliasToIndex == null) {
5146            return;
5147        }
5148        String key = effectiveAliasLowerCaseOrNull(t);
5149        if (key == null) return;
5150        Integer idx = subqueryAliasToIndex.get(key);
5151        if (idx == null) return;
5152        result.put(key, outputColumnNames(stmts.get(idx)));
5153    }
5154
5155    /**
5156     * Slice 92 — returns {@code true} when the MySQL DELETE statement is a
5157     * self-reference single-target form ({@code DELETE T1 FROM T1 [WHERE …]})
5158     * that is semantically equivalent to {@code DELETE FROM T1 [WHERE …]}.
5159     *
5160     * <p>The check requires ALL of the following (codex plan-review Q1+Q5
5161     * BLOCKING fix — checking only {@code joins[0]} is insufficient because
5162     * {@code DELETE T1 FROM T2} also has {@code joins.size==1} and
5163     * {@code joins[0].table=="T1"==targetQName}, yet the FROM clause points
5164     * to a different table):
5165     * <ol>
5166     *   <li>{@code joins.size == 1} — exactly one MySQL target list entry.</li>
5167     *   <li>{@code getReferenceJoins().size() == 1} — exactly one FROM clause
5168     *       table.</li>
5169     *   <li>{@code joins[0]} has no JoinItems — must be a plain table, not a
5170     *       JOIN chain.</li>
5171     *   <li>{@code refJoins[0]} has no JoinItems — same constraint.</li>
5172     *   <li>{@code joins[0].table.name.toLowerCase() == targetQName.toLowerCase()}.
5173     *   </li>
5174     *   <li>{@code refJoins[0].table.name.toLowerCase() == targetQName.toLowerCase()}.
5175     *   </li>
5176     * </ol>
5177     */
5178    private static boolean isMysqlSelfReferenceDelete(
5179            TDeleteSqlStatement delete, String targetQName) {
5180        if (delete.joins == null || delete.joins.size() != 1) return false;
5181        TJoinList ref = delete.getReferenceJoins();
5182        if (ref == null || ref.size() != 1) return false;
5183        TJoin join0 = delete.joins.getJoin(0);
5184        if (join0.getJoinItems() != null && join0.getJoinItems().size() > 0) {
5185            return false;
5186        }
5187        TJoin ref0 = ref.getJoin(0);
5188        if (ref0.getJoinItems() != null && ref0.getJoinItems().size() > 0) {
5189            return false;
5190        }
5191        TTable joinTable = join0.getTable();
5192        if (joinTable == null || joinTable.getTableName() == null) return false;
5193        TTable refTable = ref0.getTable();
5194        if (refTable == null || refTable.getTableName() == null) return false;
5195        String lowerTarget = targetQName.toLowerCase(java.util.Locale.ROOT);
5196        String joinName = joinTable.getTableName().toString()
5197                .toLowerCase(java.util.Locale.ROOT);
5198        String refName = refTable.getTableName().toString()
5199                .toLowerCase(java.util.Locale.ROOT);
5200
5201        // Codex diff-review P1 fix: MySQL allows the alias in the DELETE
5202        // target list instead of the table name:
5203        //   DELETE t FROM T1 AS t WHERE t.id = 1
5204        // In this form joins[0].table.name = "t" (the alias used in the
5205        // delete-list) while targetQName = "T1" (from getTargetTable()
5206        // which the parser resolves to the real table). Accept the target
5207        // table's alias as a valid joins[0] match alongside the table name.
5208        String targetAlias = null;
5209        if (delete.getTargetTable() != null
5210                && delete.getTargetTable().getAliasClause() != null
5211                && delete.getTargetTable().getAliasClause().getAliasName() != null) {
5212            String a = delete.getTargetTable().getAliasClause()
5213                    .getAliasName().toString();
5214            if (a != null && !a.isEmpty()) {
5215                targetAlias = a.toLowerCase(java.util.Locale.ROOT);
5216            }
5217        }
5218        boolean joinMatchesTarget = joinName.equals(lowerTarget)
5219                || (targetAlias != null && joinName.equals(targetAlias));
5220        // refJoins[0].table must always be the real table name (= targetQName).
5221        return joinMatchesTarget && refName.equals(lowerTarget);
5222    }
5223
5224    /**
5225     * Per-result-column metadata about an extracted scalar-subquery
5226     * projection (slice 11). {@link #statementIndex} points to the
5227     * inner body statement; {@link #innerOutputName} is the inner
5228     * SELECT's single projected output name (used to wire the
5229     * STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage edge).
5230     */
5231    private static final class ScalarInfo {
5232        final int statementIndex;
5233        final String innerOutputName;
5234        ScalarInfo(int statementIndex, String innerOutputName) {
5235            this.statementIndex = statementIndex;
5236            this.innerOutputName = innerOutputName;
5237        }
5238    }
5239
5240    /**
5241     * Walk the consuming SELECT's FROM list. For every {@link TTable} of
5242     * type {@link gudusoft.gsqlparser.ETableSource#subquery}, recursively
5243     * build the inner statement, append it to {@code stmts}, emit its own
5244     * lineage edges, and record alias→statementIndex. The returned map is
5245     * scoped to this single consuming statement so duplicate aliases
5246     * across different scopes do not collide.
5247     *
5248     * <p>Slice 17: extraction now walks BOTH sides of every JOIN
5249     * ({@code TJoin.getTable()} for the left, {@code joinItems[i].getTable()}
5250     * for each right) and recurses into nested FROM-subquery bodies. Each
5251     * recursive level pre-extracts its own children before calling
5252     * {@code buildSelectStatement}, preserving the
5253     * {@code BodyIndexes}-required ordering (innermost body before
5254     * its consumer). FROM-subquery bodies still recurse with
5255     * {@code allowScalarProjectionSubqueries=false} (slice-15 invariant
5256     * pinned by {@code Slice15Test.scalarProjectionInsideFromSubqueryBodyStillRejected}).
5257     *
5258     * <p>Slice 18 lifts CTE bodies (the non-set-op CTE-body branch in
5259     * {@link #build} now invokes this extractor with
5260     * {@code allowFromSubqueries=true}). Still rejected: subqueries with
5261     * no alias, FROM-subqueries inside a scalar body / set-op branch /
5262     * set-op CTE body (each enforced by the caller's
5263     * {@code allowFromSubqueries=false}), and predicate subqueries inside
5264     * the FROM-subquery body's WHERE / JOIN ON / GROUP BY (slice-17
5265     * helper {@link #rejectSubqueriesInFromSubqueryBodyClauses}).
5266     */
5267    private static Map<String, Integer> extractFromSubqueriesAsStatements(
5268            TSelectSqlStatement consumer,
5269            NameBindingProvider consumerProvider,
5270            List<StatementGraph> stmts,
5271            List<LineageEdge> lineage,
5272            Map<String, Integer> cteNameToStatementIndex,
5273            Map<String, List<String>> ctePublishedColumns) {
5274        Map<String, Integer> aliasToIndex = new HashMap<>();
5275        if (consumer.joins == null) return aliasToIndex;
5276        // Slice 17 mutation-free preflight: walk the entire direct
5277        // FROM/JOIN list once and reject before any mutation of
5278        // stmts/lineage. Catches comma-FROM, anonymous subqueries,
5279        // unsupported join shapes, and ALL same-level alias collisions
5280        // (base AND subquery, since rejectDuplicateAliases inside
5281        // buildRelations only catches them later, after this level's
5282        // subquery body has already landed in stmts).
5283        preflightDirectFromList(consumer);
5284
5285        // Slice 17: walk both sides of every join and process each
5286        // direct subquery via the same helper so left/right can't drift.
5287        for (TJoin join : consumer.joins) {
5288            processDirectSubqueryTable(join.getTable(),
5289                    consumerProvider, stmts, lineage,
5290                    cteNameToStatementIndex, ctePublishedColumns, aliasToIndex);
5291            TJoinItemList items = join.getJoinItems();
5292            if (items == null) continue;
5293            for (int i = 0; i < items.size(); i++) {
5294                TJoinItem item = items.getJoinItem(i);
5295                if (item == null) continue;
5296                processDirectSubqueryTable(item.getTable(),
5297                        consumerProvider, stmts, lineage,
5298                        cteNameToStatementIndex, ctePublishedColumns, aliasToIndex);
5299            }
5300        }
5301        return aliasToIndex;
5302    }
5303
5304    /**
5305     * Slice 17 mutation-free preflight for the consumer's direct
5306     * FROM/JOIN list. Validates structural invariants BEFORE any
5307     * subquery body is appended to {@code stmts} so a deferred
5308     * failure (e.g. on the second of two siblings) doesn't strand
5309     * earlier-sibling output in the program.
5310     *
5311     * <p>Slice 62 (codex plan-review round 1): the comma-FROM
5312     * reject was removed here. The preflight runs only for
5313     * {@code allowFromSubqueries=true} paths (outer SELECT, CTE
5314     * body, FROM-subquery body recursion) — exactly the paths
5315     * that admit comma-FROM under slice 62. Synthetic body
5316     * contexts (scalar / set-op-branch / set-op-CTE / predicate)
5317     * do not run this preflight; they reach the gated reject in
5318     * {@link #buildRelations} (and predicate bodies hit the
5319     * earlier slice-62 reject inside
5320     * {@link #preflightExistsInnerShape}).
5321     */
5322    private static void preflightDirectFromList(TSelectSqlStatement consumer) {
5323        if (consumer.joins == null) return;
5324        Set<String> seenSubqueryAliases = new HashSet<>();
5325        Set<String> seenAllAliases = new HashSet<>();
5326        for (TJoin join : consumer.joins) {
5327            preflightOneTable(join.getTable(), seenSubqueryAliases, seenAllAliases);
5328            TJoinItemList items = join.getJoinItems();
5329            if (items == null) continue;
5330            for (int i = 0; i < items.size(); i++) {
5331                TJoinItem item = items.getJoinItem(i);
5332                if (item == null) continue;
5333                rejectUnsupportedJoinShape(item);
5334                preflightOneTable(item.getTable(), seenSubqueryAliases, seenAllAliases);
5335            }
5336        }
5337    }
5338
5339    /**
5340     * Slice 17: validate one direct FROM/JOIN-list TTable in the
5341     * mutation-free preflight. Effective alias is the SQL-written alias
5342     * if present, else the slice-74 synthetic alias for unaliased
5343     * FROM-subqueries (position-keyed), else the table name (matches
5344     * {@link #buildRelation}).
5345     *
5346     * <p>Slice 74: removed the {@code FROM_SUBQUERY_ALIAS_REQUIRED} reject
5347     * for anonymous subqueries; the slot is now filled by
5348     * {@link FromSubqueryNaming#synthAliasFor}. Two unaliased subqueries
5349     * at the same source location are theoretically impossible (the
5350     * parser would have to emit the same start token for both), but if
5351     * it ever happens the {@code DUPLICATE_FROM_SUBQUERY_ALIAS} branch
5352     * below catches it the same way as a literal user-written duplicate.
5353     *
5354     * <p>Still rejects: duplicate subquery aliases (whether user-written
5355     * or synthetic by collision), and any cross-kind alias collision
5356     * (base alias colliding with a subquery alias).
5357     */
5358    private static void preflightOneTable(TTable t,
5359                                          Set<String> seenSubqueryAliases,
5360                                          Set<String> seenAllAliases) {
5361        if (t == null) return;
5362        boolean isSub = t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery;
5363        String effective = effectiveAliasOf(t);
5364        if (effective == null || effective.isEmpty()) return;
5365        String lower = effective.toLowerCase(Locale.ROOT);
5366        if (isSub && !seenSubqueryAliases.add(lower)) {
5367            throw new SemanticIRBuildException(
5368                    Diagnostic.error(DiagnosticCode.DUPLICATE_FROM_SUBQUERY_ALIAS,
5369                    "duplicate FROM-clause subquery alias '" + effective + "'", (TParseTreeNode) null));
5370        }
5371        if (!seenAllAliases.add(lower)) {
5372            throw new SemanticIRBuildException(
5373                    Diagnostic.error(DiagnosticCode.DUPLICATE_RELATION_ALIAS,
5374                    "duplicate relation alias '" + effective
5375                            + "' is not supported (would make ColumnRef ambiguous)", (TParseTreeNode) null));
5376        }
5377    }
5378
5379    /**
5380     * Slice 17: extract one direct subquery TTable as its own
5381     * StatementGraph. Recurses into the inner SELECT first
5382     * (innermost body lands in {@code stmts} BEFORE its consumer, as
5383     * {@code BodyIndexes} requires). Skips non-subquery tables (base
5384     * relations are bound later by {@code buildRelations}).
5385     */
5386    private static void processDirectSubqueryTable(
5387            TTable t,
5388            NameBindingProvider consumerProvider,
5389            List<StatementGraph> stmts,
5390            List<LineageEdge> lineage,
5391            Map<String, Integer> cteNameToStatementIndex,
5392            Map<String, List<String>> ctePublishedColumns,
5393            Map<String, Integer> aliasToIndex) {
5394        if (t == null) return;
5395        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery) return;
5396        // Alias presence/uniqueness already validated by the preflight.
5397        // Slice 74: anonymous (unaliased) subqueries get a synth name
5398        // from FromSubqueryNaming via effectiveAliasOf so the alias used
5399        // for the aliasToIndex map and inner-stmt name is non-null.
5400        String alias = effectiveAliasOf(t);
5401        String aliasLower = alias.toLowerCase(Locale.ROOT);
5402        TSelectSqlStatement inner = t.getSubquery();
5403        if (inner == null) {
5404            throw new SemanticIRBuildException(
5405                    Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_NO_INNER_SELECT,
5406                    "FROM-clause subquery '" + alias + "' has no inner SELECT", (TParseTreeNode) null));
5407        }
5408        // Slice 17 leak guard: predicate subqueries inside the
5409        // FROM-subquery body's WHERE / JOIN ON / GROUP BY would
5410        // otherwise slip past `allowScalarProjectionSubqueries=false`
5411        // (which only guards buildOutputColumns) and leak inner refs
5412        // into the body's filter/join/group ref lists.
5413        rejectSubqueriesInFromSubqueryBodyClauses(inner, alias);
5414        // Recurse into the inner's own FROM-subqueries first so each
5415        // deeper body lands in stmts BEFORE the body that consumes it.
5416        // The recursive call uses `consumerProvider` because the inner
5417        // sees the same CTE-name set as the outer (CTEs are visible
5418        // through FROM-subquery bodies — pinned by
5419        // Slice5Test.cteVisibleInsideFromSubquery).
5420        // Slice 60: thread ctePublishedColumns down unchanged. The
5421        // inner's siblings get registered into innerSubAliasToIndex
5422        // here; below we build the per-level innerInScope BEFORE
5423        // calling buildSelectStatement.
5424        Map<String, Integer> innerSubAliasToIndex =
5425                extractFromSubqueriesAsStatements(inner, consumerProvider,
5426                        stmts, lineage, cteNameToStatementIndex,
5427                        ctePublishedColumns);
5428        // Slice 60 (codex diff-review): build the inner FROM-subquery
5429        // body's effective-alias-keyed in-scope map by walking the
5430        // inner SELECT's FROM list. Sibling isolation is preserved
5431        // because `innerSubAliasToIndex` contains ONLY this body's
5432        // own children — ancestor siblings are never visited by the
5433        // walk because they're not in the inner's FROM list.
5434        Map<String, List<String>> innerInScope = buildEffectiveAliasInScopeMap(
5435                inner, consumerProvider, ctePublishedColumns,
5436                innerSubAliasToIndex, stmts);
5437        NameBindingProvider innerProviderWithStar = consumerProvider
5438                .withInScopeRelationColumns(innerInScope);
5439        // Slice 120 — switch from the 7-arg buildSelectStatement to the
5440        // 14-arg buildSelectStatementImpl so the FROM-subquery body's
5441        // WHERE clause can extract uncorrelated predicate subqueries
5442        // (IN-SELECT / EXISTS / NOT EXISTS / scalar comparison /
5443        // ANY-ALL-SOME) as their own statements (mirrors the slice-114
5444        // CTE-body lift). JOIN-ON predicate subqueries stay rejected (the
5445        // two flags are independent per the slice-113 split) — the
5446        // slice-17 leak guard rejectSubqueriesInFromSubqueryBodyClauses
5447        // above still fires for the body's JOIN-ON / GROUP-BY clauses.
5448        // allowFromSubqueries=true so its buildRelations accepts
5449        // already-extracted subquery aliases; allowScalarProjectionSubqueries
5450        // =false (slice-15 invariant). The snapshot/rollback wrapper
5451        // mirrors the slice-114 CTE-body call site: if the build appends
5452        // predicate bodies and then a later reject fires, stmts/lineage
5453        // truncate back to the pre-call boundary so a partial extraction
5454        // does not leak into the program. processDirectSubqueryTable is
5455        // shared by the SELECT / UPDATE (slice 83) / DELETE (slice 84)
5456        // FROM-subquery extractors, so this single site lifts all three.
5457        int fromBodyStmtsSnapshot = stmts.size();
5458        int fromBodyLineageSnapshot = lineage.size();
5459        StatementGraph innerStmt;
5460        try {
5461            innerStmt = buildSelectStatementImpl(inner, innerProviderWithStar, alias,
5462                    /*hasOuterCteListAlreadyProcessed=*/ false,
5463                    /*allowFromSubqueries=*/ true,
5464                    /*allowScalarProjectionSubqueries=*/ false,
5465                    /*allowWindowProjection=*/ true,
5466                    /*allowJoinOnPredicateSubqueries=*/ false,
5467                    /*stmtsForExtraction=*/ stmts,
5468                    /*lineageForExtraction=*/ lineage,
5469                    /*cteMapForExtraction=*/ cteNameToStatementIndex,
5470                    /*isPredicateBody=*/ false,
5471                    /*whereClauseContext=*/ PredicateClauseContext.FROM_SUBQUERY_BODY_WHERE,
5472                    /*allowWherePredicateSubqueries=*/ true);
5473        } catch (RuntimeException ex) {
5474            while (stmts.size() > fromBodyStmtsSnapshot) stmts.remove(stmts.size() - 1);
5475            while (lineage.size() > fromBodyLineageSnapshot) lineage.remove(lineage.size() - 1);
5476            throw ex;
5477        }
5478        int idx = stmts.size();
5479        stmts.add(innerStmt);
5480        aliasToIndex.put(aliasLower, idx);
5481        // Emit lineage with the inner's own subquery alias map (so
5482        // STATEMENT_OUTPUT → STATEMENT_OUTPUT edges target the inner's
5483        // children, not the outer's). Scalar map stays empty because
5484        // FROM-subquery bodies still reject scalar projections
5485        // (slice-15 invariant).
5486        emitLineageForStatement(innerStmt, idx, lineage,
5487                cteNameToStatementIndex,
5488                innerSubAliasToIndex,
5489                Collections.<Integer, ScalarInfo>emptyMap());
5490    }
5491
5492    /**
5493     * Slice 17 leak guard: reject subqueries inside a FROM-subquery
5494     * body's JOIN ON / GROUP BY clauses. Mirrors
5495     * {@link #rejectSubqueriesInScalarBodyClauses} (slice 11) — the
5496     * {@code allowScalarProjectionSubqueries=false} flag only guards
5497     * {@code buildOutputColumns}, so without this helper a SQL like
5498     * {@code SELECT id FROM (SELECT id FROM e JOIN d ON e.x = d.x
5499     * AND EXISTS (...)) sub} would leak the EXISTS subquery's refs into
5500     * the body's join column refs via {@code collectColumnRefs}.
5501     * HAVING / ORDER BY subqueries are caught by the slice-9 / 10
5502     * deep-scan rejecters inside {@code buildSelectStatementImpl}.
5503     *
5504     * <p>Slice 120 — the WHERE branch was removed: uncorrelated WHERE-side
5505     * predicate subqueries in a FROM-subquery body are now extracted as
5506     * their own statements by {@code buildSelectStatementImpl} via
5507     * {@link PredicateClauseContext#FROM_SUBQUERY_BODY_WHERE} (see
5508     * {@link #processDirectSubqueryTable}). {@code FROM_SUBQUERY_INNER_SUBQUERY_IN_WHERE}
5509     * stays declared-but-unreached for public-API stability (slice
5510     * 71/72/82/86/95/101/.../114 retain-for-documentation precedent).
5511     */
5512    private static void rejectSubqueriesInFromSubqueryBodyClauses(
5513            TSelectSqlStatement inner, String fromAlias) {
5514        if (inner.joins != null) {
5515            for (TJoin join : inner.joins) {
5516                TJoinItemList items = join.getJoinItems();
5517                if (items == null) continue;
5518                for (int i = 0; i < items.size(); i++) {
5519                    TJoinItem item = items.getJoinItem(i);
5520                    TExpression onCond = item == null ? null : item.getOnCondition();
5521                    if (onCond != null && containsAnySubqueryExpression(onCond)) {
5522                        throw new SemanticIRBuildException(
5523                                Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_INNER_SUBQUERY_IN_JOIN_ON,
5524                                "FROM-clause subquery '" + fromAlias
5525                                        + "' has a subquery in a JOIN ON clause; not supported yet "
5526                                        + "(would leak inner refs)", (TParseTreeNode) null));
5527                    }
5528                }
5529            }
5530        }
5531        TGroupBy groupBy = inner.getGroupByClause();
5532        if (groupBy != null) {
5533            TGroupByItemList items = groupBy.getItems();
5534            if (items != null && containsAnySubquery(items)) {
5535                throw new SemanticIRBuildException(
5536                        Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_INNER_SUBQUERY_IN_GROUP_BY,
5537                        "FROM-clause subquery '" + fromAlias
5538                                + "' has a subquery in a GROUP BY clause; not supported yet "
5539                                + "(would leak inner refs)", (TParseTreeNode) null));
5540            }
5541        }
5542    }
5543
5544    /**
5545     * Walk the consuming SELECT's result-column list. For every
5546     * top-level {@link EExpressionType#subquery_t} projection, build
5547     * the inner SELECT as its own {@link StatementGraph} (mirroring
5548     * slice 5 FROM-subquery extraction), append it to {@code stmts},
5549     * emit its own lineage edges, and record
5550     * {@code resultColumnOrdinal → ScalarInfo} so the consumer's
5551     * {@code emitLineageForStatement} can wire the
5552     * STATEMENT_OUTPUT → STATEMENT_OUTPUT edge.
5553     *
5554     * <p>Slice 11 disallows: scalar subqueries with no outer alias,
5555     * inner SELECTs that project more than one column, inner columns
5556     * with no alias and no direct column name, scalar subqueries
5557     * whose inner WHERE/JOIN/GROUP BY contains a subquery (predicate
5558     * leak guard), correlated scalar subqueries (inner refs that
5559     * resolve to outer aliases), and nested scalar subqueries.
5560     *
5561     * <p>Slice 14 lifted correlated TABLE-bound; slice 15 lifted CTE-
5562     * and SUBQUERY-bound. Slice 20 lifts <i>nested</i> scalar
5563     * projections inside a scalar body when the
5564     * {@code allowRecursiveScalarSubqueryExtraction} flag is true (passed
5565     * by the outer-build and CTE-body call sites). Set-op-branch call
5566     * sites pass false to keep the slice-12 / slice-16 boundary that
5567     * branch scalar bodies must not host another scalar projection.
5568     *
5569     * <p>Slice 20 wraps the body in a snapshot/rollback so a deeper
5570     * level's failure does not leak appended scalar-body statements at
5571     * shallower levels into {@code stmts}/{@code lineage}. The wrapper
5572     * mirrors slice-16's {@code buildSetOpProgram} and slice-17/18's
5573     * extract wrappers (§14.18 process lesson #21).
5574     */
5575    private static Map<Integer, ScalarInfo> extractScalarSubqueriesAsStatements(
5576            TSelectSqlStatement consumer,
5577            NameBindingProvider consumerProvider,
5578            List<StatementGraph> stmts,
5579            List<LineageEdge> lineage,
5580            Map<String, Integer> cteNameToStatementIndex,
5581            EnclosingScope enclosingScope,
5582            boolean allowRecursiveScalarSubqueryExtraction) {
5583        // Slice 20: SET-OP-WIDE-style transactional rollback. A failure
5584        // anywhere inside the loop (or inside a recursive call) truncates
5585        // both lists back to the pre-extraction size. The wrapper closes
5586        // the class of "mutation-free check fires after partial mutation"
5587        // (codex round-3..5 finding on slice 16); the recursive scalar
5588        // extraction surfaced it here.
5589        int stmtsSnapshot = stmts.size();
5590        int lineageSnapshot = lineage.size();
5591        try {
5592            return extractScalarSubqueriesAsStatementsInternal(consumer,
5593                    consumerProvider, stmts, lineage,
5594                    cteNameToStatementIndex, enclosingScope,
5595                    allowRecursiveScalarSubqueryExtraction);
5596        } catch (RuntimeException ex) {
5597            while (stmts.size() > stmtsSnapshot) stmts.remove(stmts.size() - 1);
5598            while (lineage.size() > lineageSnapshot) lineage.remove(lineage.size() - 1);
5599            throw ex;
5600        }
5601    }
5602
5603    /**
5604     * Internal body of {@link #extractScalarSubqueriesAsStatements}.
5605     * Wrapped with snapshot/rollback by the public entry point; do not
5606     * call directly from non-wrapper sites.
5607     */
5608    private static Map<Integer, ScalarInfo> extractScalarSubqueriesAsStatementsInternal(
5609            TSelectSqlStatement consumer,
5610            NameBindingProvider consumerProvider,
5611            List<StatementGraph> stmts,
5612            List<LineageEdge> lineage,
5613            Map<String, Integer> cteNameToStatementIndex,
5614            EnclosingScope enclosingScope,
5615            boolean allowRecursiveScalarSubqueryExtraction) {
5616        Map<Integer, ScalarInfo> ordinalToInfo = new HashMap<>();
5617        TResultColumnList rcl = consumer.getResultColumnList();
5618        if (rcl == null || rcl.size() == 0) return ordinalToInfo;
5619
5620        // Reject duplicate output aliases when a scalar projection is
5621        // present (codex impl-review round-1 SHOULD 1). Lineage refs are
5622        // keyed by (statementIndex, outputName); two outputs sharing
5623        // the same name would collapse their lineage chains and
5624        // silently merge the scalar dependency with another column's
5625        // dependency. The slice-11 boundary is the cleanest place to
5626        // enforce this since the issue is most acute when a scalar
5627        // body's STATEMENT_OUTPUT → STATEMENT_OUTPUT edge is in play.
5628        boolean hasScalar = false;
5629        for (int i = 0; i < rcl.size(); i++) {
5630            TResultColumn rc = rcl.getResultColumn(i);
5631            if (rc != null && rc.getExpr() != null
5632                    && rc.getExpr().getExpressionType() == EExpressionType.subquery_t) {
5633                hasScalar = true;
5634                break;
5635            }
5636        }
5637        if (hasScalar) {
5638            Set<String> seenOutputNames = new HashSet<>();
5639            for (int i = 0; i < rcl.size(); i++) {
5640                TResultColumn rc = rcl.getResultColumn(i);
5641                if (rc == null) continue;
5642                String alias = rc.getColumnAlias();
5643                String colName = rc.getColumnNameOnly();
5644                String name = (alias != null && !alias.isEmpty())
5645                        ? alias
5646                        : colName;
5647                if (name == null || name.isEmpty()) continue;
5648                String lower = name.toLowerCase(Locale.ROOT);
5649                if (!seenOutputNames.add(lower)) {
5650                    throw new SemanticIRBuildException(
5651                            Diagnostic.error(DiagnosticCode.DUPLICATE_OUTPUT_NAME,
5652                            "duplicate output name '" + name
5653                                    + "' in a SELECT containing a scalar subquery projection; "
5654                                    + "lineage refs are keyed by output name and would collide", rc));
5655                }
5656            }
5657        }
5658
5659        for (int i = 0; i < rcl.size(); i++) {
5660            TResultColumn rc = rcl.getResultColumn(i);
5661            if (rc == null || rc.getExpr() == null) continue;
5662            if (rc.getExpr().getExpressionType() != EExpressionType.subquery_t) {
5663                continue;
5664            }
5665            String outerAlias = rc.getColumnAlias();
5666            if (outerAlias == null || outerAlias.isEmpty()) {
5667                throw new SemanticIRBuildException(
5668                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_ALIAS_REQUIRED,
5669                        "scalar subquery projection must have an alias", rc));
5670            }
5671            TSelectSqlStatement inner = rc.getExpr().getSubQuery();
5672            if (inner == null) {
5673                throw new SemanticIRBuildException(
5674                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_NO_INNER_SELECT,
5675                        "scalar subquery projection '" + outerAlias
5676                                + "' has no inner SELECT", rc));
5677            }
5678            // Pre-recursion validation (codex round-2 MUST 2): inspect
5679            // the inner SELECT's projected column count and naming
5680            // BEFORE recursive build so the rejection message is
5681            // scalar-specific instead of bubbling up from
5682            // effectiveOutputName.
5683            TResultColumnList innerRcl = inner.getResultColumnList();
5684            if (innerRcl == null || innerRcl.size() == 0) {
5685                throw new SemanticIRBuildException(
5686                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_COLUMN_COUNT,
5687                        "scalar subquery '" + outerAlias
5688                                + "' must project exactly one column, got 0", rc));
5689            }
5690            if (innerRcl.size() != 1) {
5691                throw new SemanticIRBuildException(
5692                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_COLUMN_COUNT,
5693                        "scalar subquery '" + outerAlias
5694                                + "' must project exactly one column, got "
5695                                + innerRcl.size(), rc));
5696            }
5697            TResultColumn innerCol = innerRcl.getResultColumn(0);
5698            String innerAlias = innerCol.getColumnAlias();
5699            String innerColName = innerCol.getColumnNameOnly();
5700            boolean innerHasName =
5701                    (innerAlias != null && !innerAlias.isEmpty())
5702                            || (innerColName != null && !innerColName.isEmpty());
5703            if (!innerHasName && !isConstantExpression(innerCol.getExpr())) {
5704                throw new SemanticIRBuildException(
5705                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_INNER_PROJECTION_UNNAMED,
5706                        "scalar subquery '" + outerAlias
5707                                + "' inner projection has no alias and no column name; "
5708                                + "add an explicit alias inside the subquery", rc));
5709            }
5710            // Pre-recursion deep-scan (codex round-3 MUST 5, round-4
5711            // MUST 1): reject nested predicate subqueries in the
5712            // scalar body's WHERE / JOIN ON / GROUP BY before
5713            // collectColumnRefs can descend.
5714            rejectSubqueriesInScalarBodyClauses(inner, outerAlias);
5715
5716            // Slice 20: branch on allowRecursiveScalarSubqueryExtraction.
5717            // - true (outer / CTE-body call sites): build the inner's
5718            //   own enclosing scope chained to this caller's; recursively
5719            //   extract the inner's scalar projections; then build the
5720            //   inner with allowScalarProjectionSubqueries=true;
5721            //   compute scalarName AFTER the recursive extraction so
5722            //   the digit suffix matches the post-extraction stmts.size()
5723            //   (slice-16 codex round-1 MUST 2 lesson).
5724            // - false (set-op-branch call site): keep the slice-12 /
5725            //   slice-16 boundary — no recursive extraction; the inner
5726            //   scalar map stays empty; the inner builds with
5727            //   allowScalarProjectionSubqueries=false; promotion still
5728            //   uses the caller's enclosing scope so OUTER_REFERENCE-of-*
5729            //   correlation works at the branch level.
5730            EnclosingScope innerEnclosing;
5731            Map<Integer, ScalarInfo> innerScalarMap;
5732            String scalarName;
5733            if (allowRecursiveScalarSubqueryExtraction) {
5734                innerEnclosing = buildEnclosingScope(inner,
5735                        cteNameToStatementIndex,
5736                        Collections.<String, Integer>emptyMap(),
5737                        enclosingScope);
5738                innerScalarMap = extractScalarSubqueriesAsStatements(inner,
5739                        consumerProvider, stmts, lineage,
5740                        cteNameToStatementIndex, innerEnclosing,
5741                        /*allowRecursiveScalarSubqueryExtraction=*/ true);
5742                scalarName = SCALAR_BODY_PREFIX + stmts.size() + ">";
5743            } else {
5744                innerEnclosing = enclosingScope;
5745                innerScalarMap = Collections.<Integer, ScalarInfo>emptyMap();
5746                scalarName = SCALAR_BODY_PREFIX + stmts.size() + ">";
5747            }
5748            StatementGraph innerStmt = buildSelectStatement(inner, consumerProvider,
5749                    scalarName,
5750                    /*hasOuterCteListAlreadyProcessed=*/ false,
5751                    /*allowFromSubqueries=*/ false,
5752                    /*allowScalarProjectionSubqueries=*/ allowRecursiveScalarSubqueryExtraction,
5753                    /*allowWindowProjection=*/ false);
5754            // Slice 14: instead of rejecting correlated scalar
5755            // subqueries, promote outer-scope refs into synthesised
5756            // OUTER_REFERENCE relations on the inner statement.
5757            // Non-TABLE-bound outer refs (CTE / SUBQUERY) and
5758            // unknown aliases still throw.
5759            innerStmt = promoteCorrelatedRefsToOuterReference(
5760                    innerStmt, outerAlias, innerEnclosing);
5761            int idx = stmts.size();
5762            stmts.add(innerStmt);
5763            String innerOutName = effectiveOutputName(innerCol);
5764            ordinalToInfo.put(i, new ScalarInfo(idx, innerOutName));
5765            // Slice 20: pass the chained subquery alias map so
5766            // OUTER_REFERENCE-of-SUBQUERY refs in deeply nested scalar
5767            // bodies resolve through ancestor FROM-subquery aliases.
5768            // Pass innerScalarMap (non-empty when recursive extraction
5769            // is allowed) so the inner's own STATEMENT_OUTPUT →
5770            // STATEMENT_OUTPUT edges land.
5771            emitLineageForStatement(innerStmt, idx, lineage,
5772                    cteNameToStatementIndex,
5773                    innerEnclosing.flattenSubqueryAliasToIndex(),
5774                    innerScalarMap);
5775        }
5776        return ordinalToInfo;
5777    }
5778
5779    /**
5780     * Reject nested predicate subqueries inside a scalar body's
5781     * WHERE / JOIN ON / GROUP BY clauses. Scalar bodies are slice-11
5782     * scope; the pre-existing builder leaks predicate-subquery refs
5783     * into {@code filterColumnRefs}/etc. elsewhere, but the scalar-body
5784     * recursion path is guarded so slice-11 outputs stay clean.
5785     */
5786    private static void rejectSubqueriesInScalarBodyClauses(
5787            TSelectSqlStatement inner, String outerAlias) {
5788        TWhereClause where = inner.getWhereClause();
5789        if (where != null && containsAnySubquery(where)) {
5790            throw new SemanticIRBuildException(
5791                    Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_INNER_SUBQUERY_IN_WHERE,
5792                    "scalar subquery '" + outerAlias
5793                            + "' has a subquery in its WHERE clause; not supported yet "
5794                            + "(would leak inner refs)", (TParseTreeNode) null));
5795        }
5796        if (inner.joins != null) {
5797            for (TJoin join : inner.joins) {
5798                TJoinItemList items = join.getJoinItems();
5799                if (items == null) continue;
5800                for (int i = 0; i < items.size(); i++) {
5801                    TJoinItem item = items.getJoinItem(i);
5802                    TExpression onCond = item == null ? null : item.getOnCondition();
5803                    if (onCond != null && containsAnySubqueryExpression(onCond)) {
5804                        throw new SemanticIRBuildException(
5805                                Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_INNER_SUBQUERY_IN_JOIN_ON,
5806                                "scalar subquery '" + outerAlias
5807                                        + "' has a subquery in a JOIN ON clause; not supported yet "
5808                                        + "(would leak inner refs)", (TParseTreeNode) null));
5809                    }
5810                }
5811            }
5812        }
5813        TGroupBy groupBy = inner.getGroupByClause();
5814        if (groupBy != null) {
5815            TGroupByItemList items = groupBy.getItems();
5816            if (items != null && containsAnySubquery(items)) {
5817                throw new SemanticIRBuildException(
5818                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_INNER_SUBQUERY_IN_GROUP_BY,
5819                        "scalar subquery '" + outerAlias
5820                                + "' has a subquery in a GROUP BY clause; not supported yet "
5821                                + "(would leak inner refs)", (TParseTreeNode) null));
5822            }
5823        }
5824        // HAVING / ORDER BY subqueries are caught by the slice-9 / 10
5825        // deep-scan rejecters that fire during buildSelectStatement.
5826    }
5827
5828    /**
5829     * True iff any descendant of {@code root} is a {@link TExpression}
5830     * with {@code subquery_t} type or a non-null
5831     * {@link TExpression#getSubQuery()}. Mirrors the slice 9/10 pattern.
5832     */
5833    private static boolean containsAnySubquery(gudusoft.gsqlparser.nodes.TParseTreeNode root) {
5834        final boolean[] found = {false};
5835        root.acceptChildren(new TParseTreeVisitor() {
5836            @Override
5837            public void preVisit(TExpression e) {
5838                if (found[0]) return;
5839                if (e.getExpressionType() == EExpressionType.subquery_t
5840                        || e.getSubQuery() != null) {
5841                    found[0] = true;
5842                }
5843            }
5844        });
5845        return found[0];
5846    }
5847
5848    /** Same as {@link #containsAnySubquery} but also checks the root expression itself. */
5849    private static boolean containsAnySubqueryExpression(TExpression root) {
5850        if (root.getExpressionType() == EExpressionType.subquery_t
5851                || root.getSubQuery() != null) {
5852            return true;
5853        }
5854        return containsAnySubquery(root);
5855    }
5856
5857    /**
5858     * Slice 119 — collect every {@code subquery_t} TExpression node that
5859     * is a direct (top-level) subquery in the compound expression {@code
5860     * root}, without descending into any found subquery's own body.
5861     *
5862     * <p>Uses {@link TExpression#acceptChildren} with a depth counter to
5863     * handle all expression subtypes (arithmetic, CASE, function args)
5864     * without manual branch enumeration. The depth counter increments on
5865     * every {@code subquery_t} preVisit and decrements on every postVisit,
5866     * so nested subqueries inside a found body (e.g. an EXISTS inside a
5867     * scalar's WHERE) are tracked but NOT added to the result set.
5868     *
5869     * <p>Result list is in traversal (preVisit) order for deterministic
5870     * statement-graph numbering across identical SQL texts.
5871     */
5872    private static List<TExpression> collectNestedSubqueryExpressions(TExpression root) {
5873        if (root == null) return Collections.<TExpression>emptyList();
5874        // Defensive: root itself is a subquery_t — callers of the
5875        // mixed-expression path should have already taken the slice-115
5876        // top-level branch, but guard anyway.
5877        if (root.getExpressionType() == EExpressionType.subquery_t) {
5878            return Collections.singletonList(root);
5879        }
5880        final List<TExpression> ordered = new ArrayList<>();
5881        final int[] depth = {0};
5882        root.acceptChildren(new TParseTreeVisitor() {
5883            @Override
5884            public void preVisit(TExpression e) {
5885                if (e.getExpressionType() == EExpressionType.subquery_t) {
5886                    if (depth[0] == 0) ordered.add(e); // top-level only
5887                    depth[0]++;
5888                }
5889            }
5890            @Override
5891            public void postVisit(TExpression e) {
5892                if (e.getExpressionType() == EExpressionType.subquery_t) {
5893                    depth[0]--;
5894                }
5895            }
5896        });
5897        return ordered;
5898    }
5899
5900    /**
5901     * Slice-14 enclosing-scope map for correlation promotion. Holds the
5902     * full outer {@link RelationSource} per alias visible from the
5903     * enclosing scope so {@link #promoteCorrelatedRefsToOuterReference}
5904     * can read the outer's binding kind and qualifiedName.
5905     *
5906     * <p>The map is keyed by lower-cased alias for case-insensitive
5907     * lookup. The synthesised OUTER_REFERENCE RelationSource's alias is
5908     * NOT taken from this map's value — it is taken from the inner
5909     * ref's spelling (see §4.3 of the slice-14 plan), because
5910     * {@link gudusoft.gsqlparser.ir.semantic.binding.Resolver2NameBindingProvider}
5911     * records the inner ref's alias verbatim and case-sensitive
5912     * alias-equality elsewhere relies on that spelling.
5913     *
5914     * <p>Slice-15 adds {@link #subqueryAliasToIndex}: when a SUBQUERY-bound
5915     * outer alias is referenced from an inner correlated scalar, the
5916     * inner statement's lineage emission needs to look up the outer's
5917     * FROM-subquery body by alias to wire a STATEMENT_OUTPUT →
5918     * STATEMENT_OUTPUT edge. The map mirrors the outer's alias→index
5919     * map but is filtered to alias keys also present in
5920     * {@link #aliasLowerToOuter} as SUBQUERY entries so the two maps
5921     * cannot drift apart.
5922     *
5923     * <p>Slice-20 generalises slice-14/15's flat map into a chain of
5924     * ancestor scopes (the {@link #parent} field). Lookups walk innermost
5925     * → outermost via {@link #lookupAlias(String)}. The
5926     * {@link #flattenSubqueryAliasToIndex()} helper produces an innermost-
5927     * wins flattened map for {@code emitLineageForStatement} consumers
5928     * that resolve OUTER_REFERENCE-of-SUBQUERY through ancestor
5929     * FROM-subquery aliases. Worst-case asymptotic per build for a
5930     * scalar chain of depth D with K siblings per level is
5931     * {@code O(K · D²)}; in practice D ≤ 4-5 for human-written SQL so the
5932     * constant factor is negligible. If a real-world benchmark surfaces
5933     * the flatten as a hot path, the obvious fix is a memo keyed on
5934     * {@code EnclosingScope} identity — deferred until measured.
5935     */
5936    private static final class EnclosingScope {
5937        final Map<String, RelationSource> aliasLowerToOuter;
5938        final Map<String, Integer> subqueryAliasToIndex;
5939        /** Slice-20 chain: parent enclosing scope; null at the root. */
5940        final EnclosingScope parent;
5941
5942        EnclosingScope(Map<String, RelationSource> aliasLowerToOuter,
5943                       Map<String, Integer> subqueryAliasToIndex,
5944                       EnclosingScope parent) {
5945            this.aliasLowerToOuter = aliasLowerToOuter;
5946            this.subqueryAliasToIndex = subqueryAliasToIndex;
5947            this.parent = parent;
5948        }
5949
5950        static EnclosingScope empty() {
5951            return new EnclosingScope(
5952                    Collections.<String, RelationSource>emptyMap(),
5953                    Collections.<String, Integer>emptyMap(),
5954                    /*parent=*/ null);
5955        }
5956
5957        /**
5958         * Walk the chain innermost → outermost; first match wins
5959         * (shadowing). Defensive cycle guard via identity-keyed visited
5960         * set: the chain is a DAG by construction (parent links never
5961         * loop back), but the guard makes the invariant explicit.
5962         */
5963        RelationSource lookupAlias(String aliasLower) {
5964            EnclosingScope cur = this;
5965            Set<EnclosingScope> visited = Collections.newSetFromMap(
5966                    new IdentityHashMap<EnclosingScope, Boolean>());
5967            while (cur != null && visited.add(cur)) {
5968                RelationSource r = cur.aliasLowerToOuter.get(aliasLower);
5969                if (r != null) return r;
5970                cur = cur.parent;
5971            }
5972            return null;
5973        }
5974
5975        /**
5976         * Innermost-wins flatten of the SUBQUERY alias → body-index chain.
5977         * Ancestors contribute their own FROM-subquery alias maps; if both
5978         * a parent and a child define the same alias, the child wins
5979         * (innermost shadows outermost). Cycle guard mirrors
5980         * {@link #lookupAlias(String)}.
5981         */
5982        Map<String, Integer> flattenSubqueryAliasToIndex() {
5983            if (parent == null) return subqueryAliasToIndex;
5984            Deque<EnclosingScope> stack = new ArrayDeque<>();
5985            Set<EnclosingScope> visited = Collections.newSetFromMap(
5986                    new IdentityHashMap<EnclosingScope, Boolean>());
5987            EnclosingScope cur = this;
5988            while (cur != null && visited.add(cur)) {
5989                stack.push(cur);
5990                cur = cur.parent;
5991            }
5992            // Stack top = outermost. Pop outermost first, emit, then
5993            // innermost overwrites via put(). Result: innermost wins.
5994            Map<String, Integer> out = new LinkedHashMap<>();
5995            while (!stack.isEmpty()) {
5996                EnclosingScope s = stack.pop();
5997                out.putAll(s.subqueryAliasToIndex);
5998            }
5999            return out;
6000        }
6001    }
6002
6003    /**
6004     * Build an {@link EnclosingScope} for the consuming SELECT by walking
6005     * its {@link TSelectSqlStatement#tables} list (FROM relations) and
6006     * classifying each entry. Mirrors how
6007     * {@link Resolver2NameBindingProvider#bindRelation} would classify
6008     * the same TTable but produces a full RelationSource per alias so
6009     * the slice-14 promotion can read both kind and qualifiedName.
6010     *
6011     * <p>Slice 20: the {@code parent} parameter chains the new scope to an
6012     * enclosing one so a doubly-nested scalar body can resolve grandparent
6013     * aliases. Top-level callers pass {@code null}; recursive scalar
6014     * extraction passes the caller's enclosing scope.
6015     *
6016     * <p>Classification (precedence on collision: CTE name beats
6017     * base-table name):
6018     * <ul>
6019     *   <li>{@code TTable.getTableType() == subquery} AND alias matches
6020     *       a key in {@code subqueryAliasToIndex} → SUBQUERY-bound.</li>
6021     *   <li>{@code TTable.getName().toLowerCase()} matches a key in
6022     *       {@code cteNameToStatementIndex} → CTE-bound.</li>
6023     *   <li>Otherwise → TABLE-bound.</li>
6024     * </ul>
6025     */
6026    private static EnclosingScope buildEnclosingScope(TSelectSqlStatement consumer,
6027                                                      Map<String, Integer> cteNameToStatementIndex,
6028                                                      Map<String, Integer> subqueryAliasToIndex,
6029                                                      EnclosingScope parent) {
6030        if (consumer == null || consumer.tables == null || consumer.tables.size() == 0) {
6031            // Even an empty FROM contributes an empty scope so the chain's
6032            // shape reflects nesting depth uniformly.
6033            return new EnclosingScope(
6034                    Collections.<String, RelationSource>emptyMap(),
6035                    Collections.<String, Integer>emptyMap(),
6036                    parent);
6037        }
6038        Map<String, RelationSource> map = new LinkedHashMap<>();
6039        Map<String, Integer> filteredSubqueryAliasToIndex = new LinkedHashMap<>();
6040        for (int i = 0; i < consumer.tables.size(); i++) {
6041            TTable t = consumer.tables.getTable(i);
6042            if (t == null) continue;
6043            // Slice 74: route anonymous FROM-subqueries through
6044            // effectiveAliasOf so the synth name (instead of the
6045            // literal "subquery" returned by t.getName()) flows into the
6046            // OUTER_REFERENCE scope chain.
6047            String aliasOrName = effectiveAliasOf(t);
6048            if (aliasOrName == null || aliasOrName.isEmpty()) continue;
6049            String lower = aliasOrName.toLowerCase(Locale.ROOT);
6050            if (map.containsKey(lower)) continue; // first-occurrence wins (defensive)
6051
6052            if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
6053                if (subqueryAliasToIndex != null && subqueryAliasToIndex.containsKey(lower)) {
6054                    map.put(lower, new RelationSource(aliasOrName,
6055                            new RelationBinding(RelationKind.SUBQUERY, aliasOrName)));
6056                    // Slice 15: keep a parallel filtered map of alias→index
6057                    // so OUTER_REFERENCE-of-SUBQUERY emit-side dispatch can
6058                    // resolve the outer body's statement index.
6059                    filteredSubqueryAliasToIndex.put(lower, subqueryAliasToIndex.get(lower));
6060                }
6061                continue;
6062            }
6063            if (t.getTableType() != gudusoft.gsqlparser.ETableSource.objectname) {
6064                continue; // function / rowList / etc. — not modelled
6065            }
6066            String name = t.getName();
6067            if (name == null || name.isEmpty()) continue;
6068            String nameLower = name.toLowerCase(Locale.ROOT);
6069            if (cteNameToStatementIndex != null
6070                    && cteNameToStatementIndex.containsKey(nameLower)) {
6071                map.put(lower, new RelationSource(aliasOrName,
6072                        new RelationBinding(RelationKind.CTE, name)));
6073            } else {
6074                map.put(lower, new RelationSource(aliasOrName,
6075                        new RelationBinding(RelationKind.TABLE, name)));
6076            }
6077        }
6078        return new EnclosingScope(map, filteredSubqueryAliasToIndex, parent);
6079    }
6080
6081    /**
6082     * Slice 117 — sibling to {@link #buildEnclosingScope} for UPDATE
6083     * SET-RHS scalar-subquery bodies. The UPDATE outer scope is the
6084     * target table (TABLE-bound) plus any FROM-side relations walked
6085     * via {@code update.getJoins()} (TABLE / CTE / SUBQUERY-classified).
6086     *
6087     * <p>The target is added FIRST so on alias collisions with a
6088     * FROM-side relation (e.g. {@code UPDATE t ... FROM other t}) the
6089     * target wins. The {@code first-occurrence wins} rule mirrors
6090     * {@link #buildEnclosingScope}.
6091     *
6092     * <p>Used only by
6093     * {@link #extractScalarSubqueriesFromUpdateSetRhsInternal}.
6094     */
6095    private static EnclosingScope buildUpdateEnclosingScope(
6096            TUpdateSqlStatement update,
6097            Map<String, Integer> cteNameToStatementIndex,
6098            Map<String, Integer> subqueryAliasToIndex,
6099            EnclosingScope parent) {
6100        Map<String, RelationSource> map = new LinkedHashMap<>();
6101        Map<String, Integer> filteredSubqueryAliasToIndex = new LinkedHashMap<>();
6102        if (update == null) {
6103            return new EnclosingScope(map, filteredSubqueryAliasToIndex, parent);
6104        }
6105        // 1) Target — always TABLE-bound. effectiveAliasOf falls back to
6106        //    the table's own name when no alias is present.
6107        TTable target = update.getTargetTable();
6108        if (target != null
6109                && target.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) {
6110            String targetAlias = effectiveAliasOf(target);
6111            String targetName = target.getName();
6112            if (targetAlias != null && !targetAlias.isEmpty()
6113                    && targetName != null && !targetName.isEmpty()) {
6114                String aliasLower = targetAlias.toLowerCase(Locale.ROOT);
6115                map.put(aliasLower, new RelationSource(targetAlias,
6116                        new RelationBinding(RelationKind.TABLE, targetName)));
6117            }
6118        }
6119        // 2) FROM-side joins.
6120        TJoinList joins = update.getJoins();
6121        if (joins != null) {
6122            for (TJoin join : joins) {
6123                addUpdateRelationToEnclosingScope(join.getTable(), map,
6124                        filteredSubqueryAliasToIndex,
6125                        cteNameToStatementIndex, subqueryAliasToIndex);
6126                TJoinItemList items = join.getJoinItems();
6127                if (items == null) continue;
6128                for (int i = 0; i < items.size(); i++) {
6129                    TJoinItem item = items.getJoinItem(i);
6130                    if (item == null) continue;
6131                    addUpdateRelationToEnclosingScope(item.getTable(), map,
6132                            filteredSubqueryAliasToIndex,
6133                            cteNameToStatementIndex, subqueryAliasToIndex);
6134                }
6135            }
6136        }
6137        return new EnclosingScope(map, filteredSubqueryAliasToIndex, parent);
6138    }
6139
6140    /**
6141     * Slice 117 — classify one FROM-side TTable for
6142     * {@link #buildUpdateEnclosingScope}. SUBQUERY-typed tables go to
6143     * {@link RelationKind#SUBQUERY} if their alias was registered in
6144     * {@code subqueryAliasToIndex} (slice-83 extraction map); objectname
6145     * tables go to {@link RelationKind#CTE} if their bare name is a
6146     * declared CTE, otherwise {@link RelationKind#TABLE}. First-
6147     * occurrence wins. Function / rowList sources are silently skipped
6148     * (not modelled).
6149     */
6150    private static void addUpdateRelationToEnclosingScope(TTable t,
6151            Map<String, RelationSource> map,
6152            Map<String, Integer> filteredSubqueryAliasToIndex,
6153            Map<String, Integer> cteNameToStatementIndex,
6154            Map<String, Integer> subqueryAliasToIndex) {
6155        if (t == null) return;
6156        String aliasOrName = effectiveAliasOf(t);
6157        if (aliasOrName == null || aliasOrName.isEmpty()) return;
6158        String lower = aliasOrName.toLowerCase(Locale.ROOT);
6159        if (map.containsKey(lower)) return; // first-occurrence wins
6160        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
6161            if (subqueryAliasToIndex != null
6162                    && subqueryAliasToIndex.containsKey(lower)) {
6163                map.put(lower, new RelationSource(aliasOrName,
6164                        new RelationBinding(RelationKind.SUBQUERY, aliasOrName)));
6165                filteredSubqueryAliasToIndex.put(lower,
6166                        subqueryAliasToIndex.get(lower));
6167            }
6168            return;
6169        }
6170        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.objectname) {
6171            return; // function / rowList / etc. — not modelled
6172        }
6173        String name = t.getName();
6174        if (name == null || name.isEmpty()) return;
6175        String nameLower = name.toLowerCase(Locale.ROOT);
6176        if (cteNameToStatementIndex != null
6177                && cteNameToStatementIndex.containsKey(nameLower)) {
6178            map.put(lower, new RelationSource(aliasOrName,
6179                    new RelationBinding(RelationKind.CTE, name)));
6180        } else {
6181            map.put(lower, new RelationSource(aliasOrName,
6182                    new RelationBinding(RelationKind.TABLE, name)));
6183        }
6184    }
6185
6186    /**
6187     * Slice 118 — sibling to {@link #buildUpdateEnclosingScope} for MERGE
6188     * per-WHEN action WHERE correlated predicate subqueries. Builds the
6189     * enclosing scope's relation map covering MERGE's target table, USING
6190     * source, and any outer CTEs declared on the MERGE itself. Used only
6191     * by {@link #collectMergeActionWhere}; produced once per MERGE in
6192     * {@link #buildMerge} and threaded through the predicate-subquery
6193     * extractor.
6194     *
6195     * <p>Classification mirrors {@code buildMerge} step 6:
6196     * <ul>
6197     *   <li>{@code merge.getTargetTable()} — TABLE-bound (qualifiedName =
6198     *       target's table name). First-occurrence wins.</li>
6199     *   <li>{@code merge.getUsingTable()}:
6200     *       <ul>
6201     *         <li>SUBQUERY-typed → SUBQUERY-bound with index pulled from
6202     *             {@code aliasToSubIdx}.</li>
6203     *         <li>objectname-typed AND name in
6204     *             {@code cteNameToStatementIndex} → CTE-bound (slice-101
6205     *             USING-as-CTE).</li>
6206     *         <li>Else objectname → TABLE-bound.</li>
6207     *       </ul></li>
6208     * </ul>
6209     */
6210    private static EnclosingScope buildMergeEnclosingScope(
6211            TMergeSqlStatement merge,
6212            Map<String, Integer> cteNameToStatementIndex,
6213            Map<String, Integer> aliasToSubIdx) {
6214        Map<String, RelationSource> map = new LinkedHashMap<>();
6215        Map<String, Integer> filteredSubqueryAliasToIndex = new LinkedHashMap<>();
6216        if (merge == null) {
6217            return new EnclosingScope(map, filteredSubqueryAliasToIndex,
6218                    /*parent=*/ null);
6219        }
6220        // 1) Target — always TABLE-bound. effectiveAliasOf falls back to
6221        //    the table's own name when no alias is present.
6222        TTable target = merge.getTargetTable();
6223        if (target != null
6224                && target.getTableType() == gudusoft.gsqlparser.ETableSource.objectname) {
6225            String targetAlias = effectiveAliasOf(target);
6226            String targetName = target.getName();
6227            if (targetAlias != null && !targetAlias.isEmpty()
6228                    && targetName != null && !targetName.isEmpty()) {
6229                String aliasLower = targetAlias.toLowerCase(Locale.ROOT);
6230                map.put(aliasLower, new RelationSource(targetAlias,
6231                        new RelationBinding(RelationKind.TABLE, targetName)));
6232            }
6233        }
6234        // 2) USING source.
6235        TTable using = merge.getUsingTable();
6236        if (using != null) {
6237            String usingAlias = effectiveAliasOf(using);
6238            // Fall back to the USING source's bare name when no alias is
6239            // present — matches buildMerge's `usingAlias` initialisation.
6240            if (usingAlias == null || usingAlias.isEmpty()) {
6241                usingAlias = (using.getName() == null
6242                        || using.getName().toString().isEmpty())
6243                        ? "__merge_using__"
6244                        : using.getName().toString();
6245            }
6246            String usingAliasLower = usingAlias.toLowerCase(Locale.ROOT);
6247            // First-occurrence wins (defensive — target's alias could
6248            // theoretically shadow if user named USING source identically;
6249            // matches buildUpdateEnclosingScope behaviour).
6250            if (!map.containsKey(usingAliasLower)) {
6251                if (using.getTableType()
6252                        == gudusoft.gsqlparser.ETableSource.subquery) {
6253                    if (aliasToSubIdx != null
6254                            && aliasToSubIdx.containsKey(usingAliasLower)) {
6255                        map.put(usingAliasLower, new RelationSource(usingAlias,
6256                                new RelationBinding(RelationKind.SUBQUERY,
6257                                        usingAlias)));
6258                        filteredSubqueryAliasToIndex.put(usingAliasLower,
6259                                aliasToSubIdx.get(usingAliasLower));
6260                    }
6261                } else if (using.getTableType()
6262                        == gudusoft.gsqlparser.ETableSource.objectname) {
6263                    String usingName = using.getName();
6264                    if (usingName != null && !usingName.isEmpty()) {
6265                        String usingNameLower =
6266                                usingName.toLowerCase(Locale.ROOT);
6267                        Integer cteIdx = (cteNameToStatementIndex == null)
6268                                ? null
6269                                : cteNameToStatementIndex.get(usingNameLower);
6270                        if (cteIdx != null) {
6271                            // Slice 101 USING-as-CTE branch — model as
6272                            // SUBQUERY-bound so the promoter classifies
6273                            // outerKind=SUBQUERY (mirrors the slice-117
6274                            // SUBQUERY classifier for UPDATE FROM-CTE
6275                            // sources, also via aliasToSubIdx).
6276                            map.put(usingAliasLower, new RelationSource(usingAlias,
6277                                    new RelationBinding(RelationKind.SUBQUERY,
6278                                            usingAlias)));
6279                            // The MERGE caller pre-populates aliasToSubIdx
6280                            // with the CTE's statement index for both
6281                            // usingAlias AND bare CTE name. Use whichever
6282                            // exists (aliasToSubIdx is keyed on lower-cased
6283                            // aliases — matches buildMerge step 6).
6284                            if (aliasToSubIdx != null
6285                                    && aliasToSubIdx.containsKey(usingAliasLower)) {
6286                                filteredSubqueryAliasToIndex.put(usingAliasLower,
6287                                        aliasToSubIdx.get(usingAliasLower));
6288                            }
6289                        } else {
6290                            map.put(usingAliasLower, new RelationSource(usingAlias,
6291                                    new RelationBinding(RelationKind.TABLE, usingName)));
6292                        }
6293                    }
6294                }
6295                // function / rowList sources are silently skipped (not
6296                // modelled — same convention as buildUpdateEnclosingScope).
6297            }
6298        }
6299        return new EnclosingScope(map, filteredSubqueryAliasToIndex,
6300                /*parent=*/ null);
6301    }
6302
6303    /**
6304     * Slice 117 — precompute the lowercased set of inner local FROM
6305     * aliases for a SELECT statement, used by the tolerant-outer-
6306     * binding fallback in
6307     * {@link gudusoft.gsqlparser.ir.semantic.binding.Resolver2NameBindingProvider#bindColumn}.
6308     * Walks {@code select.tables} (which includes both the FROM driver
6309     * and any JOIN sides) and collects each table's effective alias.
6310     *
6311     * <p>Computed BEFORE the inner build's {@code buildRelations} JOIN-
6312     * ON collector runs so the tolerant provider is already scope-aware
6313     * when the collector first calls {@code bindColumn} (codex round-5
6314     * ordering fix).
6315     */
6316    private static Set<String> precomputeInnerLocalAliases(
6317            TSelectSqlStatement select) {
6318        Set<String> aliases = new HashSet<>();
6319        if (select == null || select.tables == null) return aliases;
6320        for (int i = 0; i < select.tables.size(); i++) {
6321            TTable t = select.tables.getTable(i);
6322            if (t == null) continue;
6323            String alias = effectiveAliasOf(t);
6324            if (alias != null && !alias.isEmpty()) {
6325                aliases.add(alias.toLowerCase(Locale.ROOT));
6326            }
6327        }
6328        return aliases;
6329    }
6330
6331    /**
6332     * Slice-14 correlation promotion (slice-15 extended). Walk every
6333     * column ref in the already-built inner statement; any ref whose
6334     * alias is NOT in the inner's local relations is "correlated."
6335     * Look it up in the enclosing scope:
6336     *
6337     * <ul>
6338     *   <li>Found AND TABLE / CTE / SUBQUERY-bound → synthesise an
6339     *       OUTER_REFERENCE RelationSource with {@code outerKind}
6340     *       set to the resolved outer kind, and add to
6341     *       {@code inner.relations}.</li>
6342     *   <li>Found AND UNION / UNKNOWN-bound → throw defensively
6343     *       (no current builder path produces these from a FROM
6344     *       relation).</li>
6345     *   <li>Not found anywhere → throw (Resolver2 should not give us
6346     *       such a ref; defensive).</li>
6347     * </ul>
6348     *
6349     * <p>The synthesised RelationSource's alias is the inner ref's
6350     * spelling (case-sensitive equality is preserved). The
6351     * qualifiedName comes from the outer's existing binding:
6352     * - TABLE: outer's table name.
6353     * - CTE: outer's CTE name (NOT alias — see
6354     *   {@code correlatedToCteBoundOuterWithCteAlias}).
6355     * - SUBQUERY: outer's alias (matching slice-14 SUBQUERY
6356     *   convention, where {@code buildEnclosingScope} sets
6357     *   {@code qualifiedName=aliasOrName}).
6358     * No lower-casing happens here — that happens at projector
6359     * emit-time per slice-1 convention. Multiple inner refs with
6360     * case-variant spellings inherit the same pre-existing slice-1
6361     * limitation.
6362     */
6363    private static StatementGraph promoteCorrelatedRefsToOuterReference(
6364            StatementGraph innerStmt, String outerAlias,
6365            EnclosingScope enclosingScope) {
6366        Set<String> innerAliasesLower = new HashSet<>();
6367        for (RelationSource r : innerStmt.getRelations()) {
6368            innerAliasesLower.add(r.getAlias().toLowerCase(Locale.ROOT));
6369        }
6370        // Per alias-lower: outer's RelationSource (for binding) and the
6371        // inner ref's exact alias spelling (for the synthesised alias).
6372        LinkedHashMap<String, RelationSource> outerByLower = new LinkedHashMap<>();
6373        LinkedHashMap<String, String> firstRefSpellingByLower = new LinkedHashMap<>();
6374
6375        for (ColumnRef ref : collectAllInnerRefs(innerStmt)) {
6376            String aliasLower = ref.getRelationAlias().toLowerCase(Locale.ROOT);
6377            if (innerAliasesLower.contains(aliasLower)) continue; // local — not correlated
6378            if (outerByLower.containsKey(aliasLower)) continue;   // already promoted
6379
6380            // Slice 20: chain-walking lookup. Walks the EnclosingScope
6381            // chain innermost → outermost; first match wins (shadowing
6382            // semantics). slice 14/15 used a single-level get(); slice
6383            // 20 generalises so inner-inner scalars can resolve
6384            // grandparent (and deeper) aliases.
6385            RelationSource outerRel = enclosingScope.lookupAlias(aliasLower);
6386            if (outerRel == null) {
6387                throw new SemanticIRBuildException(
6388                        Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_UNKNOWN_RELATION_ALIAS,
6389                        "scalar subquery '" + outerAlias
6390                                + "' references unknown alias '" + ref.getRelationAlias()
6391                                + "' (column '" + ref.getRelationAlias() + "."
6392                                + ref.getColumnName()
6393                                + "'); not in inner relations or any enclosing scope", (TParseTreeNode) null));
6394            }
6395            RelationKind outerKind = outerRel.getBinding().getKind();
6396            if (outerKind != RelationKind.TABLE
6397                    && outerKind != RelationKind.CTE
6398                    && outerKind != RelationKind.SUBQUERY) {
6399                throw new SemanticIRBuildException(
6400                        Diagnostic.error(DiagnosticCode.CORRELATED_SCALAR_SUBQUERY_UNKNOWN_OUTER_BINDING,
6401                        "correlated scalar subquery '" + outerAlias
6402                                + "' references outer alias '" + ref.getRelationAlias()
6403                                + "' bound to a " + outerKind
6404                                + "; only TABLE / CTE / SUBQUERY-bound outer correlations supported", (TParseTreeNode) null));
6405            }
6406            outerByLower.put(aliasLower, outerRel);
6407            firstRefSpellingByLower.put(aliasLower, ref.getRelationAlias());
6408        }
6409
6410        if (outerByLower.isEmpty()) return innerStmt;
6411
6412        List<RelationSource> augmented = new ArrayList<>(innerStmt.getRelations());
6413        for (String key : outerByLower.keySet()) {
6414            RelationSource outerRel = outerByLower.get(key);
6415            augmented.add(new RelationSource(
6416                    firstRefSpellingByLower.get(key),
6417                    new RelationBinding(RelationKind.OUTER_REFERENCE,
6418                            outerRel.getBinding().getQualifiedName(),
6419                            outerRel.getBinding().getKind())));
6420        }
6421        return rebuildStatementGraphWithRelations(innerStmt, augmented);
6422    }
6423
6424    /**
6425     * Collect every {@link ColumnRef} reachable from {@code innerStmt}'s
6426     * seven clause-bearing fields. Used by
6427     * {@link #promoteCorrelatedRefsToOuterReference} (slice-11 scalar
6428     * bodies) and the JOIN-ON EXISTS predicate-body correlation walker
6429     * (slice 23+); mirrors the old rejecter's clause coverage exactly
6430     * (output sources, filter, join, groupBy, having, orderBy, and
6431     * slice-73 distinctOn).
6432     */
6433    private static List<ColumnRef> collectAllInnerRefs(StatementGraph innerStmt) {
6434        List<ColumnRef> all = new ArrayList<>();
6435        for (OutputColumn out : innerStmt.getOutputColumns()) {
6436            all.addAll(out.getSources());
6437        }
6438        all.addAll(innerStmt.getFilterColumnRefs());
6439        all.addAll(innerStmt.getJoinColumnRefs());
6440        all.addAll(innerStmt.getGroupByColumnRefs());
6441        all.addAll(innerStmt.getHavingColumnRefs());
6442        all.addAll(innerStmt.getOrderByColumnRefs());
6443        all.addAll(innerStmt.getDistinctOnColumnRefs());
6444        return all;
6445    }
6446
6447    /**
6448     * Copy a {@link StatementGraph} replacing only its relations list.
6449     * StatementGraph is otherwise immutable.
6450     */
6451    private static StatementGraph rebuildStatementGraphWithRelations(
6452            StatementGraph stmt, List<RelationSource> relations) {
6453        return new StatementGraph(
6454                stmt.getName(),
6455                stmt.getKind(),
6456                relations,
6457                stmt.getOutputColumns(),
6458                stmt.getFilterColumnRefs(),
6459                stmt.getJoinColumnRefs(),
6460                stmt.getGroupByColumnRefs(),
6461                stmt.getHavingColumnRefs(),
6462                stmt.getOrderByColumnRefs(),
6463                stmt.getDistinctOnColumnRefs(),
6464                stmt.isDistinct(),
6465                stmt.getSetOperator(),
6466                stmt.getRowLimit());
6467    }
6468
6469    /**
6470     * Slice 16 preflight: reject any FROM-clause subquery directly on a
6471     * set-op branch's FROM/JOIN list, BEFORE
6472     * {@link #extractScalarSubqueriesAsStatements} can append scalar-body
6473     * statements to {@code stmts}/{@code lineage}. Without this preflight,
6474     * a branch with both a FROM-subquery and a scalar projection would
6475     * extract the scalar (mutating shared state) and then fail later in
6476     * {@code buildSelectStatement}, producing a confusing scalar-correlation
6477     * error message instead of the slice-12 FROM-subquery boundary.
6478     *
6479     * <p>Inspects only the branch's DIRECT FROM/JOIN entries — does NOT
6480     * recurse into result-column expressions or scalar-body inner SELECTs
6481     * (those are handled by the recursive scalar-body build's own
6482     * {@code allowFromSubqueries=false} guard). The error message contains
6483     * both {@code set-op branch} and {@code FROM-clause subquery} keywords
6484     * so the slice-12 boundary is surfaced explicitly.
6485     */
6486    private static void rejectFromSubqueriesInSetOpBranch(TSelectSqlStatement br) {
6487        if (br.joins == null) return;
6488        for (TJoin join : br.joins) {
6489            TTable left = join.getTable();
6490            if (left != null
6491                    && left.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
6492                throw new SemanticIRBuildException(
6493                        Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_IN_SET_OP_BRANCH_FROM,
6494                        "FROM-clause subquery directly in a set-op branch FROM is not "
6495                                + "supported yet (set-op branch with FROM-clause subquery)", (TParseTreeNode) null));
6496            }
6497            TJoinItemList items = join.getJoinItems();
6498            if (items == null) continue;
6499            for (int i = 0; i < items.size(); i++) {
6500                TTable r = items.getJoinItem(i).getTable();
6501                if (r != null
6502                        && r.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
6503                    throw new SemanticIRBuildException(
6504                            Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_ON_JOIN_SIDE_IN_SET_OP_BRANCH,
6505                            "FROM-clause subquery on a JOIN side in a set-op branch is "
6506                                    + "not supported yet (set-op branch with FROM-clause subquery)", (TParseTreeNode) null));
6507                }
6508            }
6509        }
6510    }
6511
6512    // ====================================================================
6513    // Slice 12: Set operations (UNION / INTERSECT / MINUS / EXCEPT).
6514    //
6515    // Algorithm: each branch of the set-op tree is built as its own
6516    // synthetically-named StatementGraph; the outer set-op statement has
6517    // empty relations and per-output `STATEMENT_OUTPUT → STATEMENT_OUTPUT`
6518    // lineage edges to each branch's corresponding output. The flatten
6519    // walks the left-leaning AST iteratively (CLAUDE.md mandates no
6520    // recursion on leftStmt/rightStmt; would StackOverflow on 2000+ UNIONs).
6521    //
6522    // Internal-node modifiers (ORDER BY / row-limits) are rejected on every
6523    // set-op node visited (root + internal), not just the root, because
6524    // parenthesized inner combined nodes can carry those modifiers (per
6525    // /tmp/SetOpInnerModifierProbe: `(A UNION B ORDER BY id) UNION C`
6526    // attaches ORDER BY to the inner node in Oracle / PostgreSQL / MSSQL;
6527    // PostgreSQL maps inner FETCH FIRST → LIMIT).
6528    // ====================================================================
6529
6530    /**
6531     * Build a set-op program: flatten branches, build each as its own
6532     * StatementGraph, construct the outer set-op statement, emit lineage.
6533     * Returns the outer set-op statement's index in {@code stmts}.
6534     *
6535     * @param setOp the {@link TSelectSqlStatement} carrying
6536     *     {@code setOperatorType != none}.
6537     * @param setOpName non-null when the set-op is a CTE body (the outer
6538     *     statement is named with the CTE name); null when the set-op is
6539     *     the program's top-level outer.
6540     * @param hasOuterCteListAlreadyProcessed true when the caller already
6541     *     processed the set-op root's CTE list (top-level dispatch);
6542     *     false when this is a recursive context (set-op CTE body) where
6543     *     a non-empty CTE list on the set-op root is rejected as a
6544     *     nested WITH.
6545     */
6546    private static int buildSetOpProgram(TSelectSqlStatement setOp,
6547                                         NameBindingProvider provider,
6548                                         List<StatementGraph> stmts,
6549                                         List<LineageEdge> lineage,
6550                                         Map<String, Integer> cteNameToStatementIndex,
6551                                         String setOpName,
6552                                         boolean hasOuterCteListAlreadyProcessed) {
6553        // Nested-WITH guard (rd-5 MUST 1): when called from a CTE body
6554        // context, the set-op root must not carry its own CTE list.
6555        if (!hasOuterCteListAlreadyProcessed
6556                && setOp.getCteList() != null
6557                && setOp.getCteList().size() > 0) {
6558            throw new SemanticIRBuildException(
6559                    Diagnostic.error(DiagnosticCode.NESTED_WITH_NOT_SUPPORTED,
6560                    "nested WITH/CTE inside a CTE body or subquery is not supported yet "
6561                            + "(set-op CTE body has its own CTE list)", (TParseTreeNode) null));
6562        }
6563        // Slice 21: ORDER BY is now collected from the outer set-op
6564        // (see buildSetOpOuterOrderByColumnRefs).
6565        // Slice 72: outer row-limit lifted via buildSetOpRowLimit.
6566        // The internal-node reject (parenthesized inner set-ops with
6567        // row-limit) fires inside flattenSetOpTreeIteratively. Compute
6568        // BEFORE the snapshot block so a defensive throw (Hive/Vertica/
6569        // ANSI-DB2 guards, MSSQL null-valued TOrderBy slots) propagates
6570        // without leaving stmts/lineage partially populated.
6571        RowLimit setOpRowLimit = buildSetOpRowLimit(setOp);
6572
6573        // Slice 16: SET-OP-WIDE TRANSACTIONAL ROLLBACK (codex rounds 3-5
6574        // adversarial findings). Snapshot `stmts.size()` and
6575        // `lineage.size()` BEFORE any branch mutation. On any
6576        // SemanticIRBuildException thrown by the per-branch loop or
6577        // post-build validation, truncate both lists back to the snapshot
6578        // and rethrow. This addresses the full class of "mutation-free
6579        // branch validation that fires after earlier-branch mutations":
6580        // FROM-subquery preflight (round 3), column-count check (round 4),
6581        // multi-scalar-projection per-branch validation (round 5), branch
6582        // duplicate-output-names check (round 5), and any future check
6583        // that may join the same class. The pre-loop preflight below
6584        // remains for fast-fail with better error messages on common
6585        // shapes, but the rollback is the safety net.
6586        int stmtsSnapshot = stmts.size();
6587        int lineageSnapshot = lineage.size();
6588        try {
6589            return buildSetOpProgramInternal(setOp, provider, stmts, lineage,
6590                    cteNameToStatementIndex, setOpName,
6591                    hasOuterCteListAlreadyProcessed, setOpRowLimit);
6592        } catch (RuntimeException e) {
6593            while (stmts.size() > stmtsSnapshot) stmts.remove(stmts.size() - 1);
6594            while (lineage.size() > lineageSnapshot) lineage.remove(lineage.size() - 1);
6595            throw e;
6596        }
6597    }
6598
6599    /**
6600     * Internal body of {@link #buildSetOpProgram}. Wrapped with
6601     * snapshot/rollback by the public entry point; do not call directly.
6602     */
6603    private static int buildSetOpProgramInternal(TSelectSqlStatement setOp,
6604                                                  NameBindingProvider provider,
6605                                                  List<StatementGraph> stmts,
6606                                                  List<LineageEdge> lineage,
6607                                                  Map<String, Integer> cteNameToStatementIndex,
6608                                                  String setOpName,
6609                                                  boolean hasOuterCteListAlreadyProcessed,
6610                                                  RowLimit setOpRowLimit) {
6611
6612        SetOperator setOpKind = resolveSetOperator(setOp);
6613        List<TSelectSqlStatement> branches = flattenSetOpTreeIteratively(setOp, setOpKind);
6614        if (branches.size() < 2) {
6615            throw new SemanticIRBuildException(
6616                    Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_COUNT_TOO_FEW,
6617                    "set-op flatten produced " + branches.size()
6618                            + " branches; expected at least 2", (TParseTreeNode) null));
6619        }
6620
6621        // Slice 16: PRE-LOOP PREFLIGHT (codex round-3 + round-4 adversarial
6622        // findings — medium). Run all mutation-free branch validation across
6623        // EVERY branch BEFORE the main build loop runs. Without this, a
6624        // scalar-bearing earlier branch can append scalar-body statements to
6625        // `stmts`/`lineage` BEFORE a later branch's rejection fires, leaving
6626        // partial state on the rejection path. The slice-16 safety claim is
6627        // "no half-built scalar bodies leak when a branch is rejected"; that
6628        // claim only holds when ALL mutation-free branch checks are
6629        // set-op-wide.
6630        //
6631        // Checks bundled here (each is AST-only and side-effect-free):
6632        //   - Defensive nested-set-op leaf check (slice 12).
6633        //   - Direct branch FROM-subquery rejection (slice 16 round 3).
6634        //   - Result-column-count compatibility across branches (slice 12,
6635        //     moved here in slice 16 round 4 — uses AST
6636        //     getResultColumnList().size() since BUILT outputColumns.size()
6637        //     is unavailable pre-build; the post-loop check on BUILT
6638        //     outputs stays as a defensive backup if AST-vs-built ever
6639        //     diverges for a future shape).
6640        int expectedAstCols = -1;
6641        for (int i = 0; i < branches.size(); i++) {
6642            TSelectSqlStatement br = branches.get(i);
6643            if (br.getSetOperatorType() != null
6644                    && br.getSetOperatorType() != ESetOperatorType.none) {
6645                throw new SemanticIRBuildException(
6646                        Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_IS_SET_OP,
6647                        "set-op branch is itself a set operation; nested set "
6648                                + "operations in branches are not supported yet", (TParseTreeNode) null));
6649            }
6650            rejectFromSubqueriesInSetOpBranch(br);
6651            int astCols = br.getResultColumnList() == null
6652                    ? 0
6653                    : br.getResultColumnList().size();
6654            if (i == 0) {
6655                expectedAstCols = astCols;
6656            } else if (astCols != expectedAstCols) {
6657                throw new SemanticIRBuildException(
6658                        Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_COLUMN_COUNT_MISMATCH,
6659                        "set-op branch column-count mismatch: branch[0] has "
6660                                + expectedAstCols + " columns, branch[" + i + "] has "
6661                                + astCols, (TParseTreeNode) null));
6662            }
6663        }
6664
6665        int[] branchIdxs = new int[branches.size()];
6666        for (int i = 0; i < branches.size(); i++) {
6667            TSelectSqlStatement br = branches.get(i);
6668
6669            // Slice 16: per-branch enclosing scope. allowFromSubqueries
6670            // stays false at the branch level so subqueryAliasToIndex
6671            // is empty (no OUTER_REFERENCE-of-SUBQUERY in branches).
6672            // CTE-aware so a branch's scalar can correlate to a CTE
6673            // visible in scope (top-level set-ops have all outer CTEs
6674            // indexed; CTE-body set-ops have only prior visible CTEs
6675            // indexed — the current CTE is registered AFTER this method
6676            // returns, by design, mirroring the non-set-op CTE body path).
6677            EnclosingScope branchEnclosing = buildEnclosingScope(br,
6678                    cteNameToStatementIndex,
6679                    Collections.<String, Integer>emptyMap(),
6680                    /*parent=*/ null);
6681            // Slice 20: pass `false` so the slice-12 / slice-16 boundary
6682            // holds — branch scalar bodies must NOT host another scalar
6683            // projection. The branch's TOP-LEVEL scalar projection is
6684            // still allowed (slice 16); deeper recursion is not.
6685            Map<Integer, ScalarInfo> branchScalarMap =
6686                    extractScalarSubqueriesAsStatements(br, provider,
6687                            stmts, lineage, cteNameToStatementIndex, branchEnclosing,
6688                            /*allowRecursiveScalarSubqueryExtraction=*/ false);
6689
6690            // Slice 16: compute branchName AFTER scalar extraction so the
6691            // digit suffix in `<set_op_branch_<idx>>` matches the branch's
6692            // final position in `stmts`. Scalar bodies appended by
6693            // extractScalarSubqueriesAsStatements come BEFORE the branch
6694            // in `stmts`, so pre-extraction `stmts.size()` would be wrong
6695            // by (number of scalar bodies in this branch) — breaking the
6696            // slice-12 invariant that branch synthetic names round-trip
6697            // to their statement index.
6698            //
6699            // Slice 113 — predicate bodies extracted from the branch's
6700            // WHERE clause via {@link PredicateClauseContext#SET_OP_BRANCH_WHERE}
6701            // also land in {@code stmts} BEFORE the branch, INSIDE the
6702            // {@code buildSelectStatementImpl} call below. So the
6703            // pre-build {@code stmts.size()} can still understate the
6704            // branch's final position. The slice-12/16 invariant is
6705            // preserved by computing a tentative name pre-build (best
6706            // guess used by inner consumers that need a non-null name),
6707            // then rebuilding the StatementGraph with the corrected
6708            // name AFTER the build via {@link #withRenamedTo} if any
6709            // predicate body was extracted.
6710            int preBuildStmtsSize = stmts.size();
6711            String tentativeBranchName = SET_OP_BRANCH_PREFIX + preBuildStmtsSize + ">";
6712            StatementGraph branchStmt = buildSelectStatementImpl(br, provider, tentativeBranchName,
6713                    /*hasOuterCteListAlreadyProcessed=*/ false,
6714                    /*allowFromSubqueries=*/ false,
6715                    /*allowScalarProjectionSubqueries=*/ true,   // ← slice-16 lift
6716                    /*allowWindowProjection=*/ true,
6717                    // Slice 113 keeps JOIN-ON predicate subqueries rejected
6718                    // in set-op branches (slice 23 / 26 contract pinned by
6719                    // existsInSetOpBranchJoinOnStillRejected /
6720                    // lhsSubqueryInSetOpBranchRejected) — the lift is
6721                    // WHERE-only. The two flags are now independent.
6722                    /*allowJoinOnPredicateSubqueries=*/ false,
6723                    /*stmtsForExtraction=*/ stmts,               // ← slice-113
6724                    /*lineageForExtraction=*/ lineage,           // ← slice-113
6725                    /*cteMapForExtraction=*/ cteNameToStatementIndex, // ← slice-113
6726                    /*isPredicateBody=*/ false,
6727                    /*whereClauseContext=*/ PredicateClauseContext.SET_OP_BRANCH_WHERE,
6728                    /*allowWherePredicateSubqueries=*/ true);    // ← slice-113 lift
6729            int idx = stmts.size();
6730            if (idx != preBuildStmtsSize) {
6731                // Slice 113 — predicate bodies were appended during
6732                // the branch build. Rebuild the StatementGraph with
6733                // the corrected name so the slice-12/16 invariant
6734                // (digit suffix == final position) survives. The
6735                // rebuild copies all 15 fields; no LineageRef is
6736                // affected because they are idx-based, not name-based
6737                // (codex round-1 Q4 resolution).
6738                String finalBranchName = SET_OP_BRANCH_PREFIX + idx + ">";
6739                branchStmt = withRenamedTo(branchStmt, finalBranchName);
6740            }
6741            rejectDuplicateOutputNames(branchStmt, branchStmt.getName());
6742            branchIdxs[i] = idx;
6743            stmts.add(branchStmt);
6744            // Branch's own per-output, filter, and join lineage. Pass
6745            // the branch's scalar map so STATEMENT_OUTPUT →
6746            // STATEMENT_OUTPUT edges to scalar bodies are emitted.
6747            // subqueryAliasToIndex stays empty (allowFromSubqueries=false
6748            // for branches, so no FROM-subquery aliases exist at this scope).
6749            emitLineageForStatement(branchStmt, idx, lineage,
6750                    cteNameToStatementIndex,
6751                    Collections.<String, Integer>emptyMap(),
6752                    branchScalarMap);
6753        }
6754
6755        // Validate column-count alignment via BUILT statements.
6756        int expectedCols = stmts.get(branchIdxs[0]).getOutputColumns().size();
6757        for (int i = 1; i < branches.size(); i++) {
6758            int n = stmts.get(branchIdxs[i]).getOutputColumns().size();
6759            if (n != expectedCols) {
6760                throw new SemanticIRBuildException(
6761                        Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_COLUMN_COUNT_MISMATCH,
6762                        "set-op branch column-count mismatch: branch[0] has "
6763                                + expectedCols + " columns, branch[" + i + "] has " + n, (TParseTreeNode) null));
6764            }
6765        }
6766
6767        // Build outer outputs from branch[0]'s built outputs.
6768        StatementGraph branch0 = stmts.get(branchIdxs[0]);
6769        List<OutputColumn> outerOutputs = new ArrayList<>(expectedCols);
6770        Set<String> seenOuter = new HashSet<>();
6771        for (int i = 0; i < expectedCols; i++) {
6772            OutputColumn b0 = branch0.getOutputColumns().get(i);
6773            String name = b0.getName();
6774            if (name == null || name.isEmpty()) {
6775                throw new SemanticIRBuildException(
6776                        Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_OUTPUT_NAME_UNUSABLE,
6777                        "set-op output position " + i + " has no usable name in branch[0]; "
6778                                + "add an alias to the SELECT-list expression", (TParseTreeNode) null));
6779            }
6780            if (!seenOuter.add(name.toLowerCase(Locale.ROOT))) {
6781                throw new SemanticIRBuildException(
6782                        Diagnostic.error(DiagnosticCode.SET_OP_DUPLICATE_OUTER_OUTPUT_NAME,
6783                        "set-op outer output name '" + name + "' is duplicated "
6784                                + "(branch[0] has duplicate output names)", null));
6785            }
6786            outerOutputs.add(new OutputColumn(name,
6787                    /*derived=*/ true,
6788                    /*aggregate=*/ false,
6789                    /*sources=*/ Collections.<ColumnRef>emptyList(),
6790                    /*windowSpec=*/ null));
6791        }
6792
6793        // Slice 21: collect outer ORDER BY refs from branches' base sources.
6794        // A throw here unwinds via the slice-16 snapshot/rollback wrapper
6795        // in buildSetOpProgram(), so partially-built branches/scalar-bodies
6796        // do not leak into stmts/lineage on rejection.
6797        List<ColumnRef> outerOrderByRefs = buildSetOpOuterOrderByColumnRefs(
6798                setOp, outerOutputs, stmts, branchIdxs);
6799
6800        StatementGraph outer = new StatementGraph(setOpName, "SELECT",
6801                /*relations=*/ Collections.<RelationSource>emptyList(),
6802                outerOutputs,
6803                /*filterColumnRefs=*/ Collections.<ColumnRef>emptyList(),
6804                /*joinColumnRefs=*/  Collections.<ColumnRef>emptyList(),
6805                /*groupByColumnRefs=*/Collections.<ColumnRef>emptyList(),
6806                /*havingColumnRefs=*/ Collections.<ColumnRef>emptyList(),
6807                /*orderByColumnRefs=*/outerOrderByRefs,
6808                /*distinctOnColumnRefs=*/Collections.<ColumnRef>emptyList(),
6809                /*distinct=*/ false,
6810                /*setOperator=*/ setOpKind,
6811                /*rowLimit=*/ setOpRowLimit);
6812        int outerIdx = stmts.size();
6813        stmts.add(outer);
6814
6815        // Lineage: outer.outputs[i] → each branch.outputs[i] (in branch order).
6816        for (int i = 0; i < expectedCols; i++) {
6817            OutputColumn out = outer.getOutputColumns().get(i);
6818            for (int b = 0; b < branches.size(); b++) {
6819                StatementGraph branchStmt = stmts.get(branchIdxs[b]);
6820                String branchOutName = branchStmt.getOutputColumns().get(i).getName();
6821                lineage.add(new LineageEdge(
6822                        LineageRef.statementOutput(outerIdx, out.getName()),
6823                        LineageRef.statementOutput(branchIdxs[b], branchOutName)));
6824            }
6825        }
6826        return outerIdx;
6827    }
6828
6829    /**
6830     * Reject row-limit clauses on an INTERNAL (non-root) set-op node.
6831     * Slice 9 / 12 rationale: with ORDER BY they decide which rows
6832     * survive, so the canonical-model exclusion of ORDER BY is only
6833     * sound when no row-limit is present.
6834     *
6835     * <p>Slice 21 split this from {@code rejectSetOpInternalOrderBy}
6836     * because the OUTER set-op node lifts its ORDER BY (collected via
6837     * {@link #buildSetOpOuterOrderByColumnRefs}). Slice 72 narrows the
6838     * row-limit guard the same way: the OUTER set-op node now lifts
6839     * row-limit metadata (collected via {@link #buildSetOpRowLimit}),
6840     * while parenthesized inner combined operations carrying a
6841     * row-limit (e.g. {@code (A UNION B LIMIT 3) UNION C}) remain
6842     * rejected because the intermediate limit is destroyed by the
6843     * outer set operation.
6844     */
6845    private static void rejectSetOpRowLimit(TSelectSqlStatement node) {
6846        if (node.getLimitClause() != null) {
6847            throw new SemanticIRBuildException(
6848                    Diagnostic.error(DiagnosticCode.SET_OP_ROW_LIMIT_NOT_SUPPORTED,
6849                    "row-limit clause LIMIT on a non-root set-op node is not supported yet", (TParseTreeNode) null));
6850        }
6851        if (node.getTopClause() != null) {
6852            throw new SemanticIRBuildException(
6853                    Diagnostic.error(DiagnosticCode.SET_OP_ROW_LIMIT_NOT_SUPPORTED,
6854                    "row-limit clause TOP on a non-root set-op node is not supported yet", (TParseTreeNode) null));
6855        }
6856        if (node.getFetchFirstClause() != null) {
6857            throw new SemanticIRBuildException(
6858                    Diagnostic.error(DiagnosticCode.SET_OP_ROW_LIMIT_NOT_SUPPORTED,
6859                    "row-limit clause FETCH FIRST on a non-root set-op node is not supported yet", (TParseTreeNode) null));
6860        }
6861        if (node.getOffsetClause() != null) {
6862            throw new SemanticIRBuildException(
6863                    Diagnostic.error(DiagnosticCode.SET_OP_ROW_LIMIT_NOT_SUPPORTED,
6864                    "row-limit clause OFFSET on a non-root set-op node is not supported yet", (TParseTreeNode) null));
6865        }
6866    }
6867
6868    /**
6869     * Reject ORDER BY on an INTERNAL (non-root) set-op node. Slice 21
6870     * lifted ORDER BY on the OUTER (root) set-op, but parenthesized
6871     * inner combined nodes like
6872     * {@code (A UNION B ORDER BY id) UNION C} remain rejected: the
6873     * intermediate sort is destroyed by the outer set operation
6874     * (UNION does not preserve order), so the inner ORDER BY has no
6875     * observable effect. Lifting requires modelling intermediate sort
6876     * semantics — a future slice.
6877     */
6878    private static void rejectSetOpInternalOrderBy(TSelectSqlStatement node) {
6879        if (node.getOrderbyClause() != null) {
6880            throw new SemanticIRBuildException(
6881                    Diagnostic.error(DiagnosticCode.SET_OP_NON_ROOT_ORDER_BY_NOT_SUPPORTED,
6882                    "ORDER BY on a non-root set-op node is not supported yet "
6883                            + "(intermediate sort would be discarded by the outer set operation)", (TParseTreeNode) null));
6884        }
6885    }
6886
6887    /**
6888     * Collect physical column refs for the outer set-op's ORDER BY
6889     * clause. Slice 21 lifts the slice-12 rejection on set-op outer
6890     * ORDER BY using the slice-9 single-SELECT pattern, generalised:
6891     *
6892     * <ul>
6893     *   <li>Each sort-key item passes the same shape rejections as
6894     *       {@link #buildOrderByColumnRefs} (ordinals, constants,
6895     *       scalar / predicate subqueries, window functions, ORDER
6896     *       SIBLINGS BY, RESET WHEN, in-clause OFFSET/FETCH).
6897     *   <li>Each {@link TObjectName} reference dispatches via a
6898     *       four-case fail-closed taxonomy: {@code column_alias} →
6899     *       lookup via {@code toString()}; unqualified {@code column}
6900     *       → lookup via {@code getColumnNameOnly()}; qualified
6901     *       {@code column} → reject (set-op outer scope is the
6902     *       unioned outputs, not branches' tables); other
6903     *       {@code dbObjectType} → reject as unsupported.
6904     *   <li>The lookup is positional against {@code outerOutputs} (=
6905     *       branch[0].outputs by slice-12 design) — NOT per-branch
6906     *       name search. Per-branch name search would mis-bind
6907     *       swapped-name branches and silently accept names present
6908     *       only in non-branch[0]. Slice-21 codex rounds 1-2 MUSTs.
6909     *   <li>Each branch contributes its
6910     *       {@code outputColumns[pos].sources} for the matched
6911     *       position. Branches with empty sources at the matched
6912     *       position (scalar / fully-derived projection) reject the
6913     *       sort key with a tuned message — silent omission would
6914     *       lose dependency information (slice-21 codex round 1
6915     *       MUST 5).
6916     *   <li>Each branch-local {@link ColumnRef} is normalised to its
6917     *       catalog name via {@link RelationSource#getBinding()}'s
6918     *       {@code qualifiedName}. The set-op outer's relations list
6919     *       is empty, so branch-local aliases are not resolvable in
6920     *       the owning statement; normalisation yields self-contained
6921     *       refs (slice-21 codex round 1 MUST 4 + round 2 MUST 1).
6922     *   <li>A per-item empty-refs guard rejects sort keys that
6923     *       contributed zero physical refs (e.g. {@code ORDER BY
6924     *       1+0}, {@code ORDER BY UPPER('x')}). Mirrors slice-9
6925     *       single-SELECT invariant. Operates on a per-item local
6926     *       set, so duplicate cross-item refs ({@code ORDER BY id,
6927     *       id}) survive global LinkedHashSet de-duplication
6928     *       (slice-21 codex round 4 MUST 1).
6929     * </ul>
6930     *
6931     * <p>Like slice-9 ORDER BY for single-SELECT, this list does NOT
6932     * contribute to the canonical model — it is presentation metadata
6933     * only. The dlineage XML probe ({@code /tmp/SetOpOrderByLimitProbe})
6934     * confirmed dlineage emits no parity edges for set-op outer
6935     * ORDER BY.
6936     */
6937    private static List<ColumnRef> buildSetOpOuterOrderByColumnRefs(
6938            TSelectSqlStatement setOp,
6939            List<OutputColumn> outerOutputs,
6940            List<StatementGraph> stmts,
6941            int[] branchIdxs) {
6942        TOrderBy orderBy = setOp.getOrderbyClause();
6943        if (orderBy == null) {
6944            return new ArrayList<>();
6945        }
6946        if (orderBy.isSiblings()) {
6947            throw new SemanticIRBuildException(
6948                    Diagnostic.error(DiagnosticCode.ORDER_SIBLINGS_BY_NOT_SUPPORTED,
6949                    "ORDER SIBLINGS BY is not supported yet "
6950                            + "(Oracle hierarchical ordering)", orderBy));
6951        }
6952        if (orderBy.getResetWhenCondition() != null) {
6953            throw new SemanticIRBuildException(
6954                    Diagnostic.error(DiagnosticCode.ORDER_BY_RESET_WHEN_NOT_SUPPORTED,
6955                    "ORDER BY ... RESET WHEN is not supported yet "
6956                            + "(Teradata window-style restart)", orderBy));
6957        }
6958        // Slice 72: TOrderBy in-clause OFFSET/FETCH slots are admitted
6959        // for MSSQL set-op outer via buildSetOpRowLimit's TOrderBy
6960        // fallback (the MSSQL parser routes set-op outer OFFSET/FETCH
6961        // EXCLUSIVELY onto TOrderBy, not duplicated onto the SELECT
6962        // node as in single-SELECT). Removing the previous defensive
6963        // throws here so the slice-72 admit shapes aren't false-
6964        // rejected. The unused codes
6965        // ORDER_BY_FETCH_FIRST_NOT_SUPPORTED and
6966        // ORDER_BY_OFFSET_NOT_SUPPORTED stay as documentation of a
6967        // known reject taxonomy.
6968        TOrderByItemList items = orderBy.getItems();
6969        if (items == null || items.size() == 0) {
6970            return new ArrayList<>();
6971        }
6972        LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
6973        for (int i = 0; i < items.size(); i++) {
6974            TOrderByItem item = items.getOrderByItem(i);
6975            if (item == null) continue;
6976            TExpression sortKey = item.getSortKey();
6977            if (sortKey == null) continue;
6978            // Same shape rejections as slice-9 single-SELECT.
6979            rejectOrderByOrdinalOrConstant(sortKey);
6980            rejectOrderByScalarSubquery(sortKey);
6981            rejectOrderByWindowFunction(sortKey);
6982            // (NOT rejectOrderByAliasReference — alias refs are valid
6983            // at set-op outer scope; they ARE the branch-output names,
6984            // looked up positionally below.)
6985
6986            // Per-item local set so the empty-refs guard counts refs
6987            // FOUND for this sort key, not refs ADDED to the global
6988            // set after de-dup. Otherwise `ORDER BY id, id` would
6989            // false-reject the second item (slice-21 codex round 4 MUST 1).
6990            LinkedHashSet<ColumnRef> itemRefs = new LinkedHashSet<>();
6991            collectSetOpOuterRefsForSortKey(sortKey, outerOutputs,
6992                    stmts, branchIdxs, itemRefs);
6993            if (itemRefs.isEmpty()) {
6994                throw new SemanticIRBuildException(
6995                        Diagnostic.error(DiagnosticCode.SET_OP_OUTER_ORDER_BY_NO_PHYSICAL_COLUMN_REFS,
6996                        "ORDER BY sort key '" + sortKey
6997                                + "' has no physical column references at set-op "
6998                                + "outer (constant or non-column expressions are "
6999                                + "not supported yet)", sortKey));
7000            }
7001            refs.addAll(itemRefs);
7002        }
7003        return new ArrayList<>(refs);
7004    }
7005
7006    /**
7007     * Collect refs for one set-op outer ORDER BY sort key. Walks the
7008     * sort-key expression for {@link TObjectName} nodes and dispatches
7009     * each through {@link #processSetOpOrderByObjectName}. Includes a
7010     * top-level fast path for the common {@code ORDER BY x} case where
7011     * the entire sort key IS the {@link TObjectName}.
7012     *
7013     * <p>The visitor filters its dispatch to {@code column},
7014     * {@code column_alias}, and {@code unknown} dbObjectTypes — these
7015     * are the shapes that represent sort-key column references. Other
7016     * TObjectName nodes (function names, schema qualifications) are
7017     * part of the surrounding expression structure and skipped
7018     * silently. The {@code unknown} case is included so the four-case
7019     * fail-closed taxonomy in {@link #processSetOpOrderByObjectName}
7020     * rejects vendor-typed unknown qualified refs (e.g.
7021     * {@code foo.id + id}, slice-21 codex round 2 MUST 2).
7022     */
7023    private static void collectSetOpOuterRefsForSortKey(
7024            TExpression sortKey,
7025            final List<OutputColumn> outerOutputs,
7026            final List<StatementGraph> stmts,
7027            final int[] branchIdxs,
7028            final LinkedHashSet<ColumnRef> outRefs) {
7029        // Top-level fast path: the visitor's `acceptChildren` may not
7030        // visit the root TObjectName when the sort key is itself a
7031        // bare TObjectName. Mirrors slice-9 rejectOrderByAliasReference.
7032        if (sortKey.getExpressionType() == EExpressionType.simple_object_name_t) {
7033            TObjectName op = sortKey.getObjectOperand();
7034            if (op != null) {
7035                processSetOpOrderByObjectName(op, outerOutputs, stmts,
7036                        branchIdxs, outRefs);
7037                return;
7038            }
7039        }
7040        sortKey.acceptChildren(new TParseTreeVisitor() {
7041            @Override
7042            public void preVisit(TObjectName node) {
7043                EDbObjectType ot = node.getDbObjectType();
7044                // Skip non-column-like TObjectNames (function names,
7045                // schema/server qualifications). The four-case
7046                // fail-closed taxonomy still runs for column /
7047                // column_alias / unknown to handle the slice-21 codex
7048                // round 2 MUST 2 partial-accept case (e.g.
7049                // `foo.id + id` rejects via `foo.id`'s `unknown`
7050                // dbObjectType).
7051                if (ot != EDbObjectType.column
7052                        && ot != EDbObjectType.column_alias
7053                        && ot != EDbObjectType.unknown) {
7054                    return;
7055                }
7056                processSetOpOrderByObjectName(node, outerOutputs, stmts,
7057                        branchIdxs, outRefs);
7058            }
7059        });
7060    }
7061
7062    /**
7063     * Resolve one {@link TObjectName} sort-key reference at set-op
7064     * outer scope. Four-case fail-closed taxonomy (slice-21 codex
7065     * round 2 MUST 2): column_alias / unqualified column / qualified
7066     * column / other.
7067     */
7068    private static void processSetOpOrderByObjectName(
7069            TObjectName node,
7070            List<OutputColumn> outerOutputs,
7071            List<StatementGraph> stmts,
7072            int[] branchIdxs,
7073            LinkedHashSet<ColumnRef> outRefs) {
7074        EDbObjectType ot = node.getDbObjectType();
7075        String name;
7076        if (ot == EDbObjectType.column_alias) {
7077            // Aliases at set-op outer carry tableToken=alias-name (the
7078            // /tmp/SetOpQualifiedRefProbe finding); accept regardless.
7079            name = node.toString();
7080        } else if (ot == EDbObjectType.column) {
7081            if (node.getTableToken() != null) {
7082                throw new SemanticIRBuildException(
7083                        Diagnostic.error(DiagnosticCode.ORDER_BY_QUALIFIED_REFERENCE_NOT_SUPPORTED,
7084                        "qualified column reference '" + node
7085                                + "' in set-op outer ORDER BY not supported "
7086                                + "(scope is the unioned outputs, not branches' tables)", node));
7087            }
7088            name = node.getColumnNameOnly();
7089        } else {
7090            throw new SemanticIRBuildException(
7091                    Diagnostic.error(DiagnosticCode.ORDER_BY_OBJECT_REFERENCE_UNSUPPORTED,
7092                    "unsupported ORDER BY object reference '" + node
7093                            + "' (dbObjectType=" + ot + ") in set-op outer", node));
7094        }
7095        if (name == null || name.isEmpty() || "*".equals(name)) {
7096            throw new SemanticIRBuildException(
7097                    Diagnostic.error(DiagnosticCode.ORDER_BY_OBJECT_REFERENCE_NO_USABLE_NAME,
7098                    "ORDER BY object reference '" + node + "' has no usable name", node));
7099        }
7100        String key = name.toLowerCase(Locale.ROOT);
7101        int pos = -1;
7102        for (int i = 0; i < outerOutputs.size(); i++) {
7103            String outName = outerOutputs.get(i).getName();
7104            if (outName != null && outName.toLowerCase(Locale.ROOT).equals(key)) {
7105                pos = i;
7106                break;
7107            }
7108        }
7109        if (pos < 0) {
7110            throw new SemanticIRBuildException(
7111                    Diagnostic.error(DiagnosticCode.ORDER_BY_NAME_NOT_MATCHED_IN_SET_OP_OUTPUT,
7112                    "ORDER BY '" + name + "' does not match any set-op output "
7113                            + "column (set-op outer column names come from branch[0])", node));
7114        }
7115        for (int b = 0; b < branchIdxs.length; b++) {
7116            StatementGraph br = stmts.get(branchIdxs[b]);
7117            OutputColumn oc = br.getOutputColumns().get(pos);
7118            if (oc.getSources().isEmpty()) {
7119                throw new SemanticIRBuildException(
7120                        Diagnostic.error(DiagnosticCode.SET_OP_ORDER_BY_BRANCH_OUTPUT_NO_SOURCES,
7121                        "ORDER BY '" + name + "' references branch[" + b
7122                                + "] output '" + oc.getName()
7123                                + "' which has no physical sources "
7124                                + "(derived/scalar projection); cannot "
7125                                + "capture this dependency yet", node));
7126            }
7127            for (ColumnRef cr : oc.getSources()) {
7128                outRefs.add(normaliseSetOpBranchRef(cr, br));
7129            }
7130        }
7131    }
7132
7133    /**
7134     * Normalise a branch-local {@link ColumnRef} to a self-contained
7135     * ref using the underlying {@link RelationBinding#getQualifiedName()}.
7136     *
7137     * <p>Slice-21 invariant (codex round 2 MUST 1): the set-op outer's
7138     * {@code relations} list is empty. Branch-local aliases (like
7139     * {@code e} for {@code FROM employees e}) are not resolvable in
7140     * the outer statement, so {@code orderByColumnRefs} normalises to
7141     * the catalog name. Fail-closed if no matching RelationSource —
7142     * this would indicate corrupt branch lineage state.
7143     */
7144    private static ColumnRef normaliseSetOpBranchRef(ColumnRef cr,
7145                                                     StatementGraph branch) {
7146        String alias = cr.getRelationAlias();
7147        String aliasKey = alias.toLowerCase(Locale.ROOT);
7148        for (RelationSource rs : branch.getRelations()) {
7149            if (rs.getAlias().toLowerCase(Locale.ROOT).equals(aliasKey)) {
7150                return new ColumnRef(rs.getBinding().getQualifiedName(),
7151                        cr.getColumnName());
7152            }
7153        }
7154        throw new SemanticIRBuildException(
7155                Diagnostic.error(DiagnosticCode.BRANCH_COLUMN_REF_UNKNOWN_RELATION,
7156                "internal: branch ColumnRef relationAlias '" + alias
7157                        + "' does not match any RelationSource in the "
7158                        + "branch's relations list", null));
7159    }
7160
7161    /**
7162     * Reject duplicate output names within a single statement.
7163     * Lineage refs are keyed by {@code (statementIndex, outputName)}; two
7164     * outputs sharing a name silently merge their lineage chains.
7165     */
7166    private static void rejectDuplicateOutputNames(StatementGraph stmt, String label) {
7167        Set<String> seen = new HashSet<>();
7168        for (OutputColumn c : stmt.getOutputColumns()) {
7169            String name = c.getName();
7170            if (name == null || name.isEmpty()) continue;
7171            if (!seen.add(name.toLowerCase(Locale.ROOT))) {
7172                throw new SemanticIRBuildException(
7173                        Diagnostic.error(DiagnosticCode.SET_OP_BRANCH_DUPLICATE_OUTPUT_NAME,
7174                        "set-op branch '" + label + "' has duplicate output name '"
7175                                + name + "'; lineage refs are keyed by output name "
7176                                + "and would collide", null));
7177            }
7178        }
7179    }
7180
7181    /**
7182     * Map ({@link ESetOperatorType}, {@code isAll()}) to the IR
7183     * {@link SetOperator} enum. The exhaustive switch makes a future
7184     * {@code ESetOperatorType} value fail loudly at build time
7185     * (mirrors slice-8 {@code resolveDistinctFlag} pattern).
7186     */
7187    private static SetOperator resolveSetOperator(TSelectSqlStatement setOp) {
7188        ESetOperatorType type = setOp.getSetOperatorType();
7189        if (type == null) {
7190            throw new SemanticIRBuildException(
7191                    Diagnostic.error(DiagnosticCode.SET_OP_ROOT_TYPE_NULL,
7192                    "expected non-null set-op type on the set-op root", (TParseTreeNode) null));
7193        }
7194        boolean all = setOp.isAll();
7195        switch (type) {
7196            case union:     return all ? SetOperator.UNION_ALL     : SetOperator.UNION;
7197            case intersect: return all ? SetOperator.INTERSECT_ALL : SetOperator.INTERSECT;
7198            case minus:     return all ? SetOperator.MINUS_ALL     : SetOperator.MINUS;
7199            case except:    return all ? SetOperator.EXCEPT_ALL    : SetOperator.EXCEPT;
7200            case none:
7201                throw new SemanticIRBuildException(
7202                        Diagnostic.error(DiagnosticCode.SET_OP_ROOT_TYPE_NONE,
7203                        "expected non-none set operator type on the set-op root", (TParseTreeNode) null));
7204            default:
7205                throw new SemanticIRBuildException(
7206                        Diagnostic.error(DiagnosticCode.SET_OP_UNKNOWN_OPERATOR_TYPE,
7207                        "unknown set operator type: " + type, (TParseTreeNode) null));
7208        }
7209    }
7210
7211    /**
7212     * Iteratively flatten the left-leaning set-op tree into a list of
7213     * leaf SELECT statements (CLAUDE.md mandates no recursion on
7214     * {@code leftStmt}/{@code rightStmt}; would StackOverflow on 2000+
7215     * UNIONs).
7216     *
7217     * <p>On every internal set-op node visited:
7218     * <ol>
7219     *   <li>Reject row-limit modifiers ({@link #rejectSetOpRowLimit})
7220     *       on every node (root + internal). Slice 12 +
7221     *       {@code /tmp/SetOpInnerModifierProbe}: parenthesized inner
7222     *       combined nodes can carry row-limits in Oracle / PostgreSQL /
7223     *       MSSQL.</li>
7224     *   <li>Reject ORDER BY ({@link #rejectSetOpInternalOrderBy}) only
7225     *       on INTERNAL (non-root) nodes. Slice 21 lifted ORDER BY on
7226     *       the root via {@link #buildSetOpOuterOrderByColumnRefs}; an
7227     *       internal {@code (A UNION B ORDER BY id) UNION C} sort is
7228     *       still discarded by the outer set operation, so it has no
7229     *       observable effect and remains rejected.</li>
7230     *   <li>Reject mixed-operator and mixed-{@code ALL} chains by checking
7231     *       the resolved kind matches the root's kind.</li>
7232     *   <li>Hard-reject malformed AST (null left/right child).</li>
7233     * </ol>
7234     *
7235     * <p>Push order is right-then-left so leaves emerge in left-to-right
7236     * declaration order.
7237     */
7238    private static List<TSelectSqlStatement> flattenSetOpTreeIteratively(
7239            TSelectSqlStatement root, SetOperator expected) {
7240        List<TSelectSqlStatement> leaves = new ArrayList<>();
7241        Deque<TSelectSqlStatement> stack = new ArrayDeque<>();
7242        stack.push(root);
7243        while (!stack.isEmpty()) {
7244            TSelectSqlStatement cur = stack.pop();
7245            ESetOperatorType t = cur.getSetOperatorType();
7246            if (t != null && t != ESetOperatorType.none) {
7247                // Slice 21: ORDER BY guard fires only on INTERNAL nodes.
7248                // The root (`cur == root`) lifts ORDER BY; the collection
7249                // happens in buildSetOpOuterOrderByColumnRefs.
7250                // Slice 72: row-limit guard ALSO fires only on INTERNAL
7251                // nodes. The root lifts via buildSetOpRowLimit (called
7252                // by buildSetOpProgram before this method).
7253                if (cur != root) {
7254                    rejectSetOpRowLimit(cur);
7255                    rejectSetOpInternalOrderBy(cur);
7256                }
7257                SetOperator curKind = resolveSetOperator(cur);
7258                if (curKind != expected) {
7259                    throw new SemanticIRBuildException(
7260                            Diagnostic.error(DiagnosticCode.MIXED_SET_OPERATORS_NOT_SUPPORTED,
7261                            "mixed set operators in a single chain are not supported yet "
7262                                    + "(root=" + expected + ", inner=" + curKind + ")", (TParseTreeNode) null));
7263                }
7264                if (cur.getLeftStmt() == null || cur.getRightStmt() == null) {
7265                    throw new SemanticIRBuildException(
7266                            Diagnostic.error(DiagnosticCode.MALFORMED_SET_OP_AST,
7267                            "malformed set-op AST: null left/right child", (TParseTreeNode) null));
7268                }
7269                stack.push(cur.getRightStmt());
7270                stack.push(cur.getLeftStmt());
7271            } else {
7272                leaves.add(cur);
7273            }
7274        }
7275        return leaves;
7276    }
7277
7278    private static Set<String> collectCteNames(TCTEList cteList) {
7279        if (cteList == null || cteList.size() == 0) return Collections.emptySet();
7280        Set<String> names = new HashSet<>();
7281        for (int i = 0; i < cteList.size(); i++) {
7282            String name = cteList.getCTE(i).getTableName().toString();
7283            if (name != null && !name.isEmpty()) {
7284                names.add(name.toLowerCase(Locale.ROOT));
7285            }
7286        }
7287        return names;
7288    }
7289
7290    /**
7291     * Slice 107 — return the first CTE name shared between the outer-WITH
7292     * and inner-WITH CTE lists on an INSERT (case-insensitive, lowercase
7293     * via {@code toLowerCase(Locale.ROOT)} matching the slice-15/103
7294     * duplicate-name walker convention), or {@code null} if the name sets
7295     * are disjoint. Used by {@code buildInsert} to keep the
7296     * shared-name case rejecting (PG/Oracle/Snowflake nested-WITH
7297     * inner-shadows-outer semantics not yet supported) while admitting
7298     * the disjoint case via flat-merge.
7299     *
7300     * <p>Pathological edge case (codex round-2 diff-review Q3): if one of
7301     * the two lists also contains an INTRA-list duplicate AND that
7302     * duplicated name happens to also appear in the other list, this
7303     * helper short-circuits with
7304     * INSERT_MIXED_OUTER_AND_INNER_WITH_NOT_SUPPORTED and masks the
7305     * more precise same-scope DUPLICATE_CTE_NAME the slice-103 walker
7306     * would have emitted. Accepted limitation — the diagnostic still
7307     * tells the user the shape is unsupported, and both codes point at
7308     * the same offending name. A future slice can pre-walk each list
7309     * for intra-list duplicates before the boundary check if a
7310     * customer reports confusion.
7311     */
7312    private static String findFirstSharedCteName(TCTEList outer, TCTEList inner) {
7313        Set<String> outerNames = new HashSet<>();
7314        for (int i = 0; i < outer.size(); i++) {
7315            outerNames.add(outer.getCTE(i).getTableName().toString().toLowerCase(Locale.ROOT));
7316        }
7317        for (int i = 0; i < inner.size(); i++) {
7318            String name = inner.getCTE(i).getTableName().toString();
7319            if (outerNames.contains(name.toLowerCase(Locale.ROOT))) {
7320                return name;
7321            }
7322        }
7323        return null;
7324    }
7325
7326    /**
7327     * Reject the case where a CTE body references a sibling CTE declared
7328     * <i>after</i> it. SQL chain semantics only allow left-to-right
7329     * references, but the bind-by-name provider would happily classify a
7330     * forward-declared CTE name as a base {@code TABLE} (because it's not
7331     * yet in {@code visibleSoFar}). Catching it here turns the silent
7332     * mislabeling into a clear error.
7333     */
7334    private static void rejectForwardCteReferences(final TCTE cte,
7335                                                   final Set<String> allCteNames,
7336                                                   final Set<String> visibleSoFar) {
7337        TSelectSqlStatement body = cte.getSubquery();
7338        if (body == null) return;
7339        final String selfName = cte.getTableName().toString().toLowerCase(Locale.ROOT);
7340        final List<String> forwards = new ArrayList<>();
7341        body.acceptChildren(new TParseTreeVisitor() {
7342            @Override
7343            public void preVisit(TTable t) {
7344                String tname = bareName(t);
7345                if (tname == null) return;
7346                String lower = tname.toLowerCase(Locale.ROOT);
7347                if (allCteNames.contains(lower)
7348                        && !visibleSoFar.contains(lower)
7349                        && !lower.equals(selfName)) {
7350                    forwards.add(tname);
7351                }
7352            }
7353        });
7354        if (!forwards.isEmpty()) {
7355            throw new SemanticIRBuildException(
7356                    Diagnostic.error(DiagnosticCode.CTE_FORWARD_REFERENCE,
7357                    "CTE '" + cte.getTableName() + "' forward-references later CTE(s) "
7358                            + forwards + "; only left-to-right CTE chains are supported", cte));
7359        }
7360    }
7361
7362    private static String bareName(TTable t) {
7363        if (t == null) return null;
7364        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.objectname) return null;
7365        return t.getName();
7366    }
7367
7368    /**
7369     * Reject {@code WITH RECURSIVE}. Slice 4 supports chained
7370     * (forward-referencing) CTEs; recursion is left for a later slice that
7371     * can model the fixpoint semantics.
7372     */
7373    private static void rejectRecursiveCtes(TCTEList cteList) {
7374        if (cteList == null) return;
7375        for (int i = 0; i < cteList.size(); i++) {
7376            TCTE cte = cteList.getCTE(i);
7377            if (cte.isRecursive()) {
7378                throw new SemanticIRBuildException(
7379                        Diagnostic.error(DiagnosticCode.CTE_WITH_RECURSIVE_NOT_SUPPORTED,
7380                        "WITH RECURSIVE is not supported yet (CTE: " + cte.getTableName() + ")", cte));
7381            }
7382        }
7383    }
7384
7385    /**
7386     * Slice 101 — walk the WITH clause on a MERGE statement and append
7387     * each CTE body to {@code stmts} as a preceding statement. Mirrors
7388     * the SELECT-side build() at lines ~516-653.
7389     *
7390     * <p>Returns a {@code cteNameToStatementIndex} map keyed by
7391     * lower-cased CTE name. {@code ctePublishedColumnsOut} is populated
7392     * with each CTE's output column names so the {@code buildMerge}
7393     * USING-as-CTE branch can install them via
7394     * {@link NameBindingProvider#withInScopeRelationColumns}.
7395     *
7396     * <p>Rejects (chronological):
7397     * <ol>
7398     *   <li>WITH RECURSIVE — reuses {@link DiagnosticCode#CTE_WITH_RECURSIVE_NOT_SUPPORTED}.
7399     *       Currently no admitting vendor (PG parser PARSE_FAILED, probe
7400     *       2026-05-17); defensive reject for forward compatibility.</li>
7401     *   <li>CTE with explicit column list — rejects with new
7402     *       {@link DiagnosticCode#MERGE_CTE_EXPLICIT_COLUMN_LIST_NOT_SUPPORTED}.
7403     *       PG and MSSQL parsers admit this shape; slice 101 defers
7404     *       because the inner CTE body output names ≠ user-visible CTE
7405     *       column names.</li>
7406     *   <li>Duplicate CTE name — reuses {@link DiagnosticCode#DUPLICATE_CTE_NAME}.</li>
7407     *   <li>Forward CTE reference — reuses {@link DiagnosticCode#CTE_FORWARD_REFERENCE}.</li>
7408     * </ol>
7409     *
7410     * <p>Set-op CTE bodies route through {@link #buildSetOpProgram};
7411     * non-set-op CTE bodies route through {@link #buildSelectStatement}.
7412     * Each CTE's published columns are added to {@code ctePublishedColumnsOut}
7413     * after its body is built so a CTE cannot self-reference (mirrors
7414     * SELECT-side slice 60).
7415     */
7416    private static Map<String, Integer> buildMergeCteList(
7417            TMergeSqlStatement merge,
7418            NameBindingProvider provider,
7419            List<StatementGraph> stmts,
7420            List<LineageEdge> lineage,
7421            Map<String, List<String>> ctePublishedColumnsOut) {
7422        TCTEList cteList = merge.getCteList();
7423        Map<String, Integer> cteNameToStatementIndex = new HashMap<>();
7424        if (cteList == null || cteList.size() == 0) {
7425            return cteNameToStatementIndex;
7426        }
7427        rejectRecursiveCtes(cteList);
7428        // Slice 102 — explicit-column-list shapes (PG/MSSQL `WITH cte(a, b) AS
7429        // (...) MERGE ...`) are admitted by rebuilding the body's
7430        // StatementGraph with the explicit-list names and rewriting outgoing
7431        // STATEMENT_OUTPUT lineage refs. The slice-101 upfront reject is
7432        // replaced by per-CTE rename application below. The slice-101 code
7433        // (MERGE_CTE_EXPLICIT_COLUMN_LIST_NOT_SUPPORTED) stays declared for
7434        // API stability.
7435        Set<String> allCteNames = collectCteNames(cteList);
7436        Set<String> visibleSoFar = new HashSet<>();
7437        for (int i = 0; i < cteList.size(); i++) {
7438            TCTE cte = cteList.getCTE(i);
7439            String cteName = cte.getTableName().toString();
7440            String cteNameLower = cteName.toLowerCase(Locale.ROOT);
7441            if (visibleSoFar.contains(cteNameLower)) {
7442                throw new SemanticIRBuildException(
7443                        Diagnostic.error(DiagnosticCode.DUPLICATE_CTE_NAME,
7444                        "duplicate CTE name '" + cteName
7445                                + "' in WITH clause; CTE names must be unique",
7446                        cte));
7447            }
7448            rejectForwardCteReferences(cte, allCteNames, visibleSoFar);
7449            NameBindingProvider bodyProvider =
7450                    provider.withCteContext(visibleSoFar);
7451            // Slice 102 — snapshot the lineage size BEFORE either branch so
7452            // the rename helper can rewrite outgoing STATEMENT_OUTPUT refs
7453            // in [lineageSize0, lineage.size()) without touching prior CTE
7454            // bodies' edges. Covers BOTH set-op and non-set-op branches
7455            // (codex round-1 plan-review BLOCKING).
7456            int lineageSize0 = lineage.size();
7457            int bodyIdx;
7458            TSelectSqlStatement cteBody = cte.getSubquery();
7459            if (cteBody != null
7460                    && cteBody.getSetOperatorType() != null
7461                    && cteBody.getSetOperatorType() != ESetOperatorType.none) {
7462                bodyIdx = buildSetOpProgram(cteBody, bodyProvider, stmts,
7463                        lineage, cteNameToStatementIndex, cteName,
7464                        /*hasOuterCteListAlreadyProcessed=*/ false);
7465                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7466            } else {
7467                int cteStmtsSize0 = stmts.size();
7468                int cteLineageSize0 = lineage.size();
7469                Map<String, Integer> cteSubqueryAliasToIndex;
7470                try {
7471                    cteSubqueryAliasToIndex =
7472                            extractFromSubqueriesAsStatements(cteBody,
7473                                    bodyProvider, stmts, lineage,
7474                                    cteNameToStatementIndex,
7475                                    ctePublishedColumnsOut);
7476                } catch (RuntimeException ex) {
7477                    while (stmts.size() > cteStmtsSize0) {
7478                        stmts.remove(stmts.size() - 1);
7479                    }
7480                    while (lineage.size() > cteLineageSize0) {
7481                        lineage.remove(lineage.size() - 1);
7482                    }
7483                    throw ex;
7484                }
7485                EnclosingScope cteEnclosing = buildEnclosingScope(cteBody,
7486                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7487                        /*parent=*/ null);
7488                Map<Integer, ScalarInfo> cteScalarMap =
7489                        extractScalarSubqueriesAsStatements(cteBody,
7490                                bodyProvider, stmts, lineage,
7491                                cteNameToStatementIndex, cteEnclosing,
7492                                /*allowRecursiveScalarSubqueryExtraction=*/ true);
7493                Map<String, List<String>> cteBodyInScope =
7494                        buildEffectiveAliasInScopeMap(cteBody, bodyProvider,
7495                                ctePublishedColumnsOut,
7496                                cteSubqueryAliasToIndex, stmts);
7497                NameBindingProvider cteBodyProviderWithStar = bodyProvider
7498                        .withInScopeRelationColumns(cteBodyInScope);
7499                // Slice 114 — switch to buildSelectStatementImpl with
7500                // snapshot/rollback (see the matching SELECT-side
7501                // CTE site for full rationale).
7502                int cteBodyStmtsSnapshot = stmts.size();
7503                int cteBodyLineageSnapshot = lineage.size();
7504                StatementGraph body;
7505                try {
7506                    body = buildSelectStatementImpl(cteBody,
7507                            cteBodyProviderWithStar, cteName,
7508                            /*hasOuterCteListAlreadyProcessed=*/ false,
7509                            /*allowFromSubqueries=*/ true,
7510                            /*allowScalarProjectionSubqueries=*/ true,
7511                            /*allowWindowProjection=*/ true,
7512                            /*allowJoinOnPredicateSubqueries=*/ false,
7513                            /*stmtsForExtraction=*/ stmts,
7514                            /*lineageForExtraction=*/ lineage,
7515                            /*cteMapForExtraction=*/ cteNameToStatementIndex,
7516                            /*isPredicateBody=*/ false,
7517                            /*whereClauseContext=*/ PredicateClauseContext.CTE_BODY_WHERE,
7518                            /*allowWherePredicateSubqueries=*/ true);
7519                } catch (RuntimeException ex) {
7520                    while (stmts.size() > cteBodyStmtsSnapshot) stmts.remove(stmts.size() - 1);
7521                    while (lineage.size() > cteBodyLineageSnapshot) lineage.remove(lineage.size() - 1);
7522                    throw ex;
7523                }
7524                bodyIdx = stmts.size();
7525                stmts.add(body);
7526                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7527                emitLineageForStatement(body, bodyIdx, lineage,
7528                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7529                        cteScalarMap);
7530            }
7531            // Slice 102 — apply explicit-column-list rename if present.
7532            // Rebuilds stmts[bodyIdx] with renamed OutputColumns and
7533            // rewrites STATEMENT_OUTPUT(bodyIdx, oldName) refs in
7534            // lineage[lineageSize0..) to use the renamed name. Returns the
7535            // published column list (renamed if explicit list applied,
7536            // else inner names from the body).
7537            List<String> publishedCols = applyExplicitCteColumnListRename(
7538                    cte, stmts, lineage, bodyIdx, lineageSize0, "MERGE");
7539            ctePublishedColumnsOut.put(cteNameLower, publishedCols);
7540            visibleSoFar.add(cteNameLower);
7541        }
7542        return cteNameToStatementIndex;
7543    }
7544
7545    /**
7546     * Slice 105 — walk the WITH clause on an UPDATE statement and append
7547     * each CTE body to {@code stmts} as a preceding statement. Mirrors
7548     * the slice-101 MERGE walker {@link #buildMergeCteList} verbatim
7549     * except for the source of the CTE list and the
7550     * {@link #applyExplicitCteColumnListRename} {@code dmlKind} argument.
7551     *
7552     * <p>Returns a {@code cteNameToStatementIndex} map keyed by
7553     * lower-cased CTE name. {@code ctePublishedColumnsOut} is populated
7554     * with each CTE's output column names so {@link #buildUpdateRelation}
7555     * + {@link #buildUpdateInScopeMap} can route FROM-side references to
7556     * the matching CTE as SUBQUERY-kind relations with the CTE's columns
7557     * published into the in-scope map.
7558     *
7559     * <p>The slice-103 SELECT-side CTE walker contract is reused via the
7560     * {@link #applyExplicitCteColumnListRename} helper with
7561     * {@code dmlKind="SELECT"} so the SELECT-side
7562     * {@link DiagnosticCode#CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH} code
7563     * fires on arity mismatch (codex round-1 Q2 confirmed YES — UPDATE is
7564     * closer to ordinary SELECT than to MERGE for CTE rename semantics).
7565     *
7566     * <p>Rejects (chronological):
7567     * <ol>
7568     *   <li>{@code WITH RECURSIVE} — {@link DiagnosticCode#CTE_WITH_RECURSIVE_NOT_SUPPORTED}.
7569     *       Currently no admitting vendor (Oracle PARSE_FAILED on outer-WITH-UPDATE).</li>
7570     *   <li>Duplicate CTE name — {@link DiagnosticCode#DUPLICATE_CTE_NAME}.</li>
7571     *   <li>Forward CTE reference — {@link DiagnosticCode#CTE_FORWARD_REFERENCE}.</li>
7572     *   <li>Explicit-column-list arity mismatch — handled by
7573     *       {@link #applyExplicitCteColumnListRename} via
7574     *       {@link DiagnosticCode#CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH}.</li>
7575     * </ol>
7576     */
7577    private static Map<String, Integer> buildUpdateCteList(
7578            TUpdateSqlStatement update,
7579            NameBindingProvider provider,
7580            List<StatementGraph> stmts,
7581            List<LineageEdge> lineage,
7582            Map<String, List<String>> ctePublishedColumnsOut) {
7583        TCTEList cteList = update.getCteList();
7584        Map<String, Integer> cteNameToStatementIndex = new HashMap<>();
7585        if (cteList == null || cteList.size() == 0) {
7586            return cteNameToStatementIndex;
7587        }
7588        rejectRecursiveCtes(cteList);
7589        Set<String> allCteNames = collectCteNames(cteList);
7590        Set<String> visibleSoFar = new HashSet<>();
7591        for (int i = 0; i < cteList.size(); i++) {
7592            TCTE cte = cteList.getCTE(i);
7593            String cteName = cte.getTableName().toString();
7594            String cteNameLower = cteName.toLowerCase(Locale.ROOT);
7595            if (visibleSoFar.contains(cteNameLower)) {
7596                throw new SemanticIRBuildException(
7597                        Diagnostic.error(DiagnosticCode.DUPLICATE_CTE_NAME,
7598                        "duplicate CTE name '" + cteName
7599                                + "' in WITH clause; CTE names must be unique",
7600                        cte));
7601            }
7602            rejectForwardCteReferences(cte, allCteNames, visibleSoFar);
7603            NameBindingProvider bodyProvider =
7604                    provider.withCteContext(visibleSoFar);
7605            int lineageSize0 = lineage.size();
7606            int bodyIdx;
7607            TSelectSqlStatement cteBody = cte.getSubquery();
7608            if (cteBody != null
7609                    && cteBody.getSetOperatorType() != null
7610                    && cteBody.getSetOperatorType() != ESetOperatorType.none) {
7611                bodyIdx = buildSetOpProgram(cteBody, bodyProvider, stmts,
7612                        lineage, cteNameToStatementIndex, cteName,
7613                        /*hasOuterCteListAlreadyProcessed=*/ false);
7614                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7615            } else {
7616                int cteStmtsSize0 = stmts.size();
7617                int cteLineageSize0 = lineage.size();
7618                Map<String, Integer> cteSubqueryAliasToIndex;
7619                try {
7620                    cteSubqueryAliasToIndex =
7621                            extractFromSubqueriesAsStatements(cteBody,
7622                                    bodyProvider, stmts, lineage,
7623                                    cteNameToStatementIndex,
7624                                    ctePublishedColumnsOut);
7625                } catch (RuntimeException ex) {
7626                    while (stmts.size() > cteStmtsSize0) {
7627                        stmts.remove(stmts.size() - 1);
7628                    }
7629                    while (lineage.size() > cteLineageSize0) {
7630                        lineage.remove(lineage.size() - 1);
7631                    }
7632                    throw ex;
7633                }
7634                EnclosingScope cteEnclosing = buildEnclosingScope(cteBody,
7635                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7636                        /*parent=*/ null);
7637                Map<Integer, ScalarInfo> cteScalarMap =
7638                        extractScalarSubqueriesAsStatements(cteBody,
7639                                bodyProvider, stmts, lineage,
7640                                cteNameToStatementIndex, cteEnclosing,
7641                                /*allowRecursiveScalarSubqueryExtraction=*/ true);
7642                Map<String, List<String>> cteBodyInScope =
7643                        buildEffectiveAliasInScopeMap(cteBody, bodyProvider,
7644                                ctePublishedColumnsOut,
7645                                cteSubqueryAliasToIndex, stmts);
7646                NameBindingProvider cteBodyProviderWithStar = bodyProvider
7647                        .withInScopeRelationColumns(cteBodyInScope);
7648                // Slice 114 — switch to buildSelectStatementImpl with
7649                // snapshot/rollback (see the matching SELECT-side
7650                // CTE site for full rationale).
7651                int cteBodyStmtsSnapshot = stmts.size();
7652                int cteBodyLineageSnapshot = lineage.size();
7653                StatementGraph body;
7654                try {
7655                    body = buildSelectStatementImpl(cteBody,
7656                            cteBodyProviderWithStar, cteName,
7657                            /*hasOuterCteListAlreadyProcessed=*/ false,
7658                            /*allowFromSubqueries=*/ true,
7659                            /*allowScalarProjectionSubqueries=*/ true,
7660                            /*allowWindowProjection=*/ true,
7661                            /*allowJoinOnPredicateSubqueries=*/ false,
7662                            /*stmtsForExtraction=*/ stmts,
7663                            /*lineageForExtraction=*/ lineage,
7664                            /*cteMapForExtraction=*/ cteNameToStatementIndex,
7665                            /*isPredicateBody=*/ false,
7666                            /*whereClauseContext=*/ PredicateClauseContext.CTE_BODY_WHERE,
7667                            /*allowWherePredicateSubqueries=*/ true);
7668                } catch (RuntimeException ex) {
7669                    while (stmts.size() > cteBodyStmtsSnapshot) stmts.remove(stmts.size() - 1);
7670                    while (lineage.size() > cteBodyLineageSnapshot) lineage.remove(lineage.size() - 1);
7671                    throw ex;
7672                }
7673                bodyIdx = stmts.size();
7674                stmts.add(body);
7675                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7676                emitLineageForStatement(body, bodyIdx, lineage,
7677                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7678                        cteScalarMap);
7679            }
7680            // Slice 105 — explicit column-list rename uses dmlKind="SELECT"
7681            // so the SELECT-side CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH
7682            // code fires (codex Q2 confirmed YES — UPDATE is closer to
7683            // ordinary SELECT than MERGE for CTE rename semantics).
7684            List<String> publishedCols = applyExplicitCteColumnListRename(
7685                    cte, stmts, lineage, bodyIdx, lineageSize0, "SELECT");
7686            ctePublishedColumnsOut.put(cteNameLower, publishedCols);
7687            visibleSoFar.add(cteNameLower);
7688        }
7689        return cteNameToStatementIndex;
7690    }
7691
7692    /**
7693     * Slice 106 — walk the WITH clause on a DELETE statement and append
7694     * each CTE body to {@code stmts} as a preceding statement. Mirrors
7695     * the slice-105 UPDATE walker {@link #buildUpdateCteList} verbatim
7696     * except for the source of the CTE list ({@code delete.getCteList()}).
7697     *
7698     * <p>Returns a {@code cteNameToStatementIndex} map keyed by
7699     * lower-cased CTE name. {@code ctePublishedColumnsOut} is populated
7700     * with each CTE's output column names so {@link #buildDeleteRelation}
7701     * + {@link #buildDeleteInScopeMap} can route FROM-side references to
7702     * the matching CTE as SUBQUERY-kind relations with the CTE's columns
7703     * published into the in-scope map.
7704     *
7705     * <p>The slice-103 SELECT-side CTE walker contract is reused via the
7706     * {@link #applyExplicitCteColumnListRename} helper with
7707     * {@code dmlKind="SELECT"} so the SELECT-side
7708     * {@link DiagnosticCode#CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH} code
7709     * fires on arity mismatch (slice-105 precedent: UPDATE/DELETE are
7710     * closer to ordinary SELECT than to MERGE for CTE rename semantics).
7711     *
7712     * <p>Rejects (chronological):
7713     * <ol>
7714     *   <li>{@code WITH RECURSIVE} —
7715     *       {@link DiagnosticCode#CTE_WITH_RECURSIVE_NOT_SUPPORTED}.
7716     *       PG / MySQL admit the parse shape but slice 106 rejects at the
7717     *       semantic layer (mirrors slice-105 boundary).</li>
7718     *   <li>Duplicate CTE name — {@link DiagnosticCode#DUPLICATE_CTE_NAME}.</li>
7719     *   <li>Forward CTE reference — {@link DiagnosticCode#CTE_FORWARD_REFERENCE}.</li>
7720     *   <li>Explicit-column-list arity mismatch — handled by
7721     *       {@link #applyExplicitCteColumnListRename} via
7722     *       {@link DiagnosticCode#CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH}.</li>
7723     * </ol>
7724     */
7725    private static Map<String, Integer> buildDeleteCteList(
7726            TDeleteSqlStatement delete,
7727            NameBindingProvider provider,
7728            List<StatementGraph> stmts,
7729            List<LineageEdge> lineage,
7730            Map<String, List<String>> ctePublishedColumnsOut) {
7731        TCTEList cteList = delete.getCteList();
7732        Map<String, Integer> cteNameToStatementIndex = new HashMap<>();
7733        if (cteList == null || cteList.size() == 0) {
7734            return cteNameToStatementIndex;
7735        }
7736        rejectRecursiveCtes(cteList);
7737        Set<String> allCteNames = collectCteNames(cteList);
7738        Set<String> visibleSoFar = new HashSet<>();
7739        for (int i = 0; i < cteList.size(); i++) {
7740            TCTE cte = cteList.getCTE(i);
7741            String cteName = cte.getTableName().toString();
7742            String cteNameLower = cteName.toLowerCase(Locale.ROOT);
7743            if (visibleSoFar.contains(cteNameLower)) {
7744                throw new SemanticIRBuildException(
7745                        Diagnostic.error(DiagnosticCode.DUPLICATE_CTE_NAME,
7746                        "duplicate CTE name '" + cteName
7747                                + "' in WITH clause; CTE names must be unique",
7748                        cte));
7749            }
7750            rejectForwardCteReferences(cte, allCteNames, visibleSoFar);
7751            NameBindingProvider bodyProvider =
7752                    provider.withCteContext(visibleSoFar);
7753            int lineageSize0 = lineage.size();
7754            int bodyIdx;
7755            TSelectSqlStatement cteBody = cte.getSubquery();
7756            if (cteBody != null
7757                    && cteBody.getSetOperatorType() != null
7758                    && cteBody.getSetOperatorType() != ESetOperatorType.none) {
7759                bodyIdx = buildSetOpProgram(cteBody, bodyProvider, stmts,
7760                        lineage, cteNameToStatementIndex, cteName,
7761                        /*hasOuterCteListAlreadyProcessed=*/ false);
7762                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7763            } else {
7764                int cteStmtsSize0 = stmts.size();
7765                int cteLineageSize0 = lineage.size();
7766                Map<String, Integer> cteSubqueryAliasToIndex;
7767                try {
7768                    cteSubqueryAliasToIndex =
7769                            extractFromSubqueriesAsStatements(cteBody,
7770                                    bodyProvider, stmts, lineage,
7771                                    cteNameToStatementIndex,
7772                                    ctePublishedColumnsOut);
7773                } catch (RuntimeException ex) {
7774                    while (stmts.size() > cteStmtsSize0) {
7775                        stmts.remove(stmts.size() - 1);
7776                    }
7777                    while (lineage.size() > cteLineageSize0) {
7778                        lineage.remove(lineage.size() - 1);
7779                    }
7780                    throw ex;
7781                }
7782                EnclosingScope cteEnclosing = buildEnclosingScope(cteBody,
7783                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7784                        /*parent=*/ null);
7785                Map<Integer, ScalarInfo> cteScalarMap =
7786                        extractScalarSubqueriesAsStatements(cteBody,
7787                                bodyProvider, stmts, lineage,
7788                                cteNameToStatementIndex, cteEnclosing,
7789                                /*allowRecursiveScalarSubqueryExtraction=*/ true);
7790                Map<String, List<String>> cteBodyInScope =
7791                        buildEffectiveAliasInScopeMap(cteBody, bodyProvider,
7792                                ctePublishedColumnsOut,
7793                                cteSubqueryAliasToIndex, stmts);
7794                NameBindingProvider cteBodyProviderWithStar = bodyProvider
7795                        .withInScopeRelationColumns(cteBodyInScope);
7796                // Slice 114 — switch to buildSelectStatementImpl with
7797                // snapshot/rollback (see the matching SELECT-side
7798                // CTE site for full rationale).
7799                int cteBodyStmtsSnapshot = stmts.size();
7800                int cteBodyLineageSnapshot = lineage.size();
7801                StatementGraph body;
7802                try {
7803                    body = buildSelectStatementImpl(cteBody,
7804                            cteBodyProviderWithStar, cteName,
7805                            /*hasOuterCteListAlreadyProcessed=*/ false,
7806                            /*allowFromSubqueries=*/ true,
7807                            /*allowScalarProjectionSubqueries=*/ true,
7808                            /*allowWindowProjection=*/ true,
7809                            /*allowJoinOnPredicateSubqueries=*/ false,
7810                            /*stmtsForExtraction=*/ stmts,
7811                            /*lineageForExtraction=*/ lineage,
7812                            /*cteMapForExtraction=*/ cteNameToStatementIndex,
7813                            /*isPredicateBody=*/ false,
7814                            /*whereClauseContext=*/ PredicateClauseContext.CTE_BODY_WHERE,
7815                            /*allowWherePredicateSubqueries=*/ true);
7816                } catch (RuntimeException ex) {
7817                    while (stmts.size() > cteBodyStmtsSnapshot) stmts.remove(stmts.size() - 1);
7818                    while (lineage.size() > cteBodyLineageSnapshot) lineage.remove(lineage.size() - 1);
7819                    throw ex;
7820                }
7821                bodyIdx = stmts.size();
7822                stmts.add(body);
7823                cteNameToStatementIndex.put(cteNameLower, bodyIdx);
7824                emitLineageForStatement(body, bodyIdx, lineage,
7825                        cteNameToStatementIndex, cteSubqueryAliasToIndex,
7826                        cteScalarMap);
7827            }
7828            // Slice 106 — explicit column-list rename uses dmlKind="SELECT"
7829            // so the SELECT-side CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH
7830            // code fires (slice-105 precedent: UPDATE/DELETE are closer
7831            // to ordinary SELECT than MERGE for CTE rename semantics).
7832            List<String> publishedCols = applyExplicitCteColumnListRename(
7833                    cte, stmts, lineage, bodyIdx, lineageSize0, "SELECT");
7834            ctePublishedColumnsOut.put(cteNameLower, publishedCols);
7835            visibleSoFar.add(cteNameLower);
7836        }
7837        return cteNameToStatementIndex;
7838    }
7839
7840    /**
7841     * Slice 105 — combine the slice-83 subqueryAliasToIndex with the
7842     * slice-105 CTE-as-FROM-relation alias→cteIdx entries so
7843     * {@link #emitUpdateSubquerySourceEdges} produces cross-stmt
7844     * lineage edges for SET RHS references resolving to a CTE column.
7845     *
7846     * <p>Without this merge the visible {@link OutputColumn#getSources}
7847     * stays correct (CTE refs surface as {@link ColumnRef}s) but
7848     * {@code lineage[]} silently loses the canonical
7849     * {@code STATEMENT_OUTPUT(update,col) → STATEMENT_OUTPUT(cte,col)}
7850     * edge (codex round-2 Q5 silent-correctness bug).
7851     *
7852     * <p>Walks {@code update.getJoins()} the same way
7853     * {@link #buildUpdateRelation} does to keep the alias resolution
7854     * identical: CTE-bound FROM-side relations are detected by their
7855     * bare name (case-insensitive) and registered under their effective
7856     * alias. Subquery aliases stay keyed lowercase to match the
7857     * slice-83 contract.
7858     */
7859    private static Map<String, Integer> buildUpdateCombinedAliasToSubIdx(
7860            TUpdateSqlStatement update,
7861            Map<String, Integer> subqueryAliasToIndex,
7862            Map<String, Integer> cteNameToStatementIndex) {
7863        Map<String, Integer> combined = new HashMap<>();
7864        if (subqueryAliasToIndex != null) {
7865            combined.putAll(subqueryAliasToIndex);
7866        }
7867        if (cteNameToStatementIndex == null
7868                || cteNameToStatementIndex.isEmpty()) {
7869            return combined;
7870        }
7871        TJoinList joins = update.getJoins();
7872        if (joins == null) return combined;
7873        for (TJoin join : joins) {
7874            addCteAliasToCombinedMap(join.getTable(),
7875                    cteNameToStatementIndex, combined);
7876            TJoinItemList items = join.getJoinItems();
7877            if (items == null) continue;
7878            for (int i = 0; i < items.size(); i++) {
7879                TJoinItem item = items.getJoinItem(i);
7880                if (item == null) continue;
7881                addCteAliasToCombinedMap(item.getTable(),
7882                        cteNameToStatementIndex, combined);
7883            }
7884        }
7885        return combined;
7886    }
7887
7888    private static void addCteAliasToCombinedMap(TTable t,
7889            Map<String, Integer> cteNameToStatementIndex,
7890            Map<String, Integer> combined) {
7891        if (t == null) return;
7892        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.objectname) {
7893            return;
7894        }
7895        TObjectName tName = t.getTableName();
7896        if (tName == null) return;
7897        String bare = tName.toString();
7898        if (bare == null || bare.isEmpty()) return;
7899        String bareLower = bare.toLowerCase(Locale.ROOT);
7900        Integer cteIdx = cteNameToStatementIndex.get(bareLower);
7901        if (cteIdx == null) return;
7902        String aliasKey = effectiveAliasLowerCaseOrNull(t);
7903        if (aliasKey == null) aliasKey = bareLower;
7904        combined.put(aliasKey, cteIdx);
7905    }
7906
7907    /**
7908     * Slice 102 / Slice 103 — when a WITH-clause CTE declares an explicit
7909     * column list ({@code WITH cte(a, b) AS (SELECT x, y FROM t)}), rebuild
7910     * {@code stmts[bodyIdx]} so its {@link OutputColumn} names match the
7911     * explicit list at each ordinal and rewrite outgoing
7912     * {@link LineageRef.Kind#STATEMENT_OUTPUT} refs in
7913     * {@code lineage[lineageSize0..lineage.size())} so the inner-projection
7914     * names are replaced by the explicit-list names.
7915     *
7916     * <p>Returns the published column list for the caller's
7917     * {@code ctePublishedColumns} map: the renamed list when an explicit list
7918     * is present; otherwise the body's inner names (matching pre-slice-102
7919     * behavior). Slice 103 reuses this helper from the outer SELECT CTE
7920     * walker via {@code dmlKind="SELECT"} (slice-100 cross-DML reuse
7921     * precedent).
7922     *
7923     * <p>Rejects:
7924     * <ul>
7925     *   <li>Arity mismatch — explicit-list size != body output count →
7926     *       {@link DiagnosticCode#MERGE_CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH}
7927     *       when {@code dmlKind="MERGE"}, otherwise
7928     *       {@link DiagnosticCode#CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH}.
7929     *       Slice 103 cannot rename the MERGE-side code (it is pinned by
7930     *       {@code Slice102Test.valueOfPinsResolveBothCodes} and adopting
7931     *       it on the SELECT side would also miswire the message text);
7932     *       the SELECT-side gets its own parallel code (codex round-1
7933     *       plan-review BLOCKING).</li>
7934     *   <li>Duplicate explicit name ({@code WITH cte(a, a) AS ...}) →
7935     *       {@link DiagnosticCode#DUPLICATE_OUTPUT_NAME}. STATEMENT_OUTPUT
7936     *       refs are keyed by output name; duplicates would collide
7937     *       (codex round-2 plan-review advisory).</li>
7938     * </ul>
7939     *
7940     * <p>{@link OutputColumn} and {@link StatementGraph} are immutable; the
7941     * rebuild uses the slice-85 15-arg primary constructor copying every
7942     * field unchanged except {@code outputColumns}. {@link LineageEdge} and
7943     * {@link LineageRef} are immutable; the rewrite walker constructs new
7944     * instances and replaces them in the mutable {@code lineage} list via
7945     * {@link List#set}.
7946     */
7947    private static List<String> applyExplicitCteColumnListRename(
7948            TCTE cte,
7949            List<StatementGraph> stmts,
7950            List<LineageEdge> lineage,
7951            int bodyIdx,
7952            int lineageSize0,
7953            String dmlKind) {
7954        StatementGraph body = stmts.get(bodyIdx);
7955        if (cte.getColumnList() == null || cte.getColumnList().size() == 0) {
7956            return outputColumnNames(body);
7957        }
7958        // Materialize the explicit list of renamed names (in declaration order).
7959        boolean isMerge = "MERGE".equals(dmlKind);
7960        String dmlLabel = isMerge ? "MERGE CTE" : "CTE";
7961        String withClauseLabel = isMerge ? "MERGE WITH clause CTE" : "WITH clause CTE";
7962        List<String> renamed = new ArrayList<>(cte.getColumnList().size());
7963        Set<String> seenLower = new HashSet<>();
7964        for (int k = 0; k < cte.getColumnList().size(); k++) {
7965            TObjectName col = cte.getColumnList().getObjectName(k);
7966            String name = (col == null) ? null : col.getColumnNameOnly();
7967            if (name == null || name.isEmpty()) {
7968                // Defensive — parser normally fills these; if not, fall
7969                // back to a synthetic name so the constructor invariant
7970                // (non-empty name) holds, and the arity check still works.
7971                name = "col" + (k + 1);
7972            }
7973            String lower = name.toLowerCase(Locale.ROOT);
7974            if (!seenLower.add(lower)) {
7975                throw new SemanticIRBuildException(Diagnostic.error(
7976                        DiagnosticCode.DUPLICATE_OUTPUT_NAME,
7977                        "duplicate column name '" + name + "' in " + dmlLabel + " '"
7978                                + cte.getTableName()
7979                                + "' explicit column list; output names must "
7980                                + "be unique within a CTE published column list",
7981                        cte));
7982            }
7983            renamed.add(name);
7984        }
7985        List<OutputColumn> bodyOutputs = body.getOutputColumns();
7986        if (bodyOutputs.size() != renamed.size()) {
7987            DiagnosticCode arityCode = isMerge
7988                    ? DiagnosticCode.MERGE_CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH
7989                    : DiagnosticCode.CTE_EXPLICIT_COLUMN_LIST_ARITY_MISMATCH;
7990            throw new SemanticIRBuildException(Diagnostic.error(
7991                    arityCode,
7992                    withClauseLabel + " '" + cte.getTableName()
7993                            + "' declares " + renamed.size()
7994                            + " explicit column(s) but the body's SELECT "
7995                            + "publishes " + bodyOutputs.size() + " column(s); "
7996                            + "the explicit list must have exactly one entry "
7997                            + "per body output column",
7998                    cte));
7999        }
8000        // Capture the old → new name mapping by ordinal BEFORE building the
8001        // new OutputColumns, so the lineage rewrite can look up the
8002        // substitution by old (inner) name. Codex round-1 diff-review
8003        // (non-blocking → upgraded to defensive guard): if the body has
8004        // duplicate inner output names (e.g. `SELECT id, id`), name-keyed
8005        // rewrite collapses both old refs to the last mapping and
8006        // produces wrong lineage. The IR contract already states output
8007        // names must be unique (see DUPLICATE_OUTPUT_NAME javadoc and the
8008        // line-4378 scalar-subquery guard) but is not enforced
8009        // generically. Reject here so explicit-rename paths cannot
8010        // silently break lineage.
8011        Set<String> seenInnerLower = new HashSet<>();
8012        for (OutputColumn oc : bodyOutputs) {
8013            String n = oc.getName();
8014            if (n == null || n.isEmpty()) continue;
8015            String lower = n.toLowerCase(Locale.ROOT);
8016            if (!seenInnerLower.add(lower)) {
8017                throw new SemanticIRBuildException(Diagnostic.error(
8018                        DiagnosticCode.DUPLICATE_OUTPUT_NAME,
8019                        dmlLabel + " '" + cte.getTableName()
8020                                + "' body publishes duplicate inner column "
8021                                + "name '" + n + "'; the explicit column "
8022                                + "list rename requires unique inner names "
8023                                + "because lineage refs are keyed by output "
8024                                + "name and would collide",
8025                        cte));
8026            }
8027        }
8028        Map<String, String> oldToNewLower = new HashMap<>();
8029        List<OutputColumn> newOutputs = new ArrayList<>(bodyOutputs.size());
8030        for (int k = 0; k < bodyOutputs.size(); k++) {
8031            OutputColumn oc = bodyOutputs.get(k);
8032            String oldName = oc.getName();
8033            String newName = renamed.get(k);
8034            if (oldName != null && !oldName.isEmpty()) {
8035                oldToNewLower.put(oldName.toLowerCase(Locale.ROOT), newName);
8036            }
8037            newOutputs.add(new OutputColumn(newName, oc.isDerived(),
8038                    oc.isAggregate(), oc.getSources(), oc.getWindowSpec()));
8039        }
8040        // Rebuild the body's StatementGraph using the slice-85 15-arg primary
8041        // constructor — copies every field (including the slice-85
8042        // returningColumns slot per codex round-1 plan-review BLOCKING)
8043        // except outputColumns.
8044        StatementGraph renamedBody = new StatementGraph(
8045                body.getName(), body.getKind(), body.getRelations(),
8046                newOutputs, body.getReturningColumns(),
8047                body.getFilterColumnRefs(), body.getJoinColumnRefs(),
8048                body.getGroupByColumnRefs(), body.getHavingColumnRefs(),
8049                body.getOrderByColumnRefs(), body.getDistinctOnColumnRefs(),
8050                body.isDistinct(), body.getSetOperator(), body.getRowLimit(),
8051                body.getTarget());
8052        stmts.set(bodyIdx, renamedBody);
8053        // Rewrite outgoing STATEMENT_OUTPUT refs in the window. Both `from`
8054        // and `to` are checked because edges can place the body-output ref
8055        // on either side (producer-side: from=TABLE_COLUMN, to=STATEMENT_OUTPUT;
8056        // consumer-side from a deeper inner stmt: from=STATEMENT_OUTPUT,
8057        // to=STATEMENT_OUTPUT — neither shape today places bodyIdx on
8058        // `from` for THIS body, but the symmetric check is cheap and
8059        // future-proof). LineageRef and LineageEdge are immutable, so new
8060        // instances are constructed and `lineage.set` replaces in place.
8061        for (int idx = lineageSize0; idx < lineage.size(); idx++) {
8062            LineageEdge edge = lineage.get(idx);
8063            LineageRef from = edge.getFrom();
8064            LineageRef to = edge.getTo();
8065            LineageRef newFrom = maybeRewriteStatementOutputRef(
8066                    from, bodyIdx, oldToNewLower);
8067            LineageRef newTo = maybeRewriteStatementOutputRef(
8068                    to, bodyIdx, oldToNewLower);
8069            if (newFrom != from || newTo != to) {
8070                lineage.set(idx, new LineageEdge(newFrom, newTo));
8071            }
8072        }
8073        return Collections.unmodifiableList(renamed);
8074    }
8075
8076    /**
8077     * Slice 113 — copy a {@link StatementGraph} with a new {@code name}
8078     * field. Every other field is preserved verbatim. Used by the
8079     * set-op branch loop to assign the synthetic
8080     * {@code <set_op_branch_<idx>>} name AFTER the branch build, in case
8081     * the branch's WHERE-side predicate-subquery extraction
8082     * (slice 113 via {@link PredicateClauseContext#SET_OP_BRANCH_WHERE})
8083     * appended predicate-body statements to {@code stmts}, which would
8084     * otherwise leave the pre-computed digit suffix lagging behind the
8085     * branch's final position.
8086     *
8087     * <p>The rebuild is purely cosmetic on the {@link StatementGraph#getName()}
8088     * field. No {@link LineageRef} is affected because all lineage refs
8089     * are idx-based (see {@link LineageRef#statementOutput(int, String)}),
8090     * not name-based. {@code outputColumns}, {@code relations},
8091     * {@code filterColumnRefs}, {@code joinColumnRefs} and every other
8092     * field are reused unchanged.
8093     */
8094    private static StatementGraph withRenamedTo(StatementGraph s, String newName) {
8095        return new StatementGraph(newName, s.getKind(),
8096                s.getRelations(), s.getOutputColumns(), s.getReturningColumns(),
8097                s.getFilterColumnRefs(), s.getJoinColumnRefs(),
8098                s.getGroupByColumnRefs(), s.getHavingColumnRefs(),
8099                s.getOrderByColumnRefs(), s.getDistinctOnColumnRefs(),
8100                s.isDistinct(), s.getSetOperator(), s.getRowLimit(),
8101                s.getTarget());
8102    }
8103
8104    /**
8105     * Slice 102 — return a new STATEMENT_OUTPUT {@link LineageRef} with the
8106     * output name substituted when {@code ref} targets {@code bodyIdx} and
8107     * its current output name is a key in {@code oldToNewLower}. Otherwise
8108     * return {@code ref} unchanged (identity-comparable so the caller can
8109     * skip the {@code lineage.set} for no-op rewrites).
8110     */
8111    private static LineageRef maybeRewriteStatementOutputRef(
8112            LineageRef ref, int bodyIdx,
8113            Map<String, String> oldToNewLower) {
8114        if (ref == null) return null;
8115        if (ref.getKind() != LineageRef.Kind.STATEMENT_OUTPUT) return ref;
8116        if (ref.getStatementIndex() != bodyIdx) return ref;
8117        String oldName = ref.getOutputName();
8118        if (oldName == null || oldName.isEmpty()) return ref;
8119        String newName = oldToNewLower.get(oldName.toLowerCase(Locale.ROOT));
8120        if (newName == null) return ref;
8121        return LineageRef.statementOutput(bodyIdx, newName);
8122    }
8123
8124    /**
8125     * Emit one lineage edge per (output, source) pair. Edges target a
8126     * {@link LineageRef.Kind#STATEMENT_OUTPUT} when the source's relation
8127     * is a CTE or a FROM-clause subquery, or a
8128     * {@link LineageRef.Kind#TABLE_COLUMN} when it's a base table.
8129     * Multi-source derived columns produce one edge per source.
8130     *
8131     * <p>{@code subqueryAliasToStatementIndex} is statement-local; the
8132     * caller supplies the alias map for this statement's own FROM list.
8133     * That avoids cross-scope alias collisions.
8134     */
8135    private static void emitLineageForStatement(StatementGraph stmt,
8136                                                int statementIndex,
8137                                                List<LineageEdge> lineage,
8138                                                Map<String, Integer> cteNameToStatementIndex,
8139                                                Map<String, Integer> subqueryAliasToStatementIndex,
8140                                                Map<Integer, ScalarInfo> ordinalToScalarInfo) {
8141        // Slice 87: lowercase alias keys so SQL identifiers written with
8142        // different casing in the FROM clause vs. SELECT qualifier resolve
8143        // correctly (e.g. `SELECT t.name FROM employees T`). Mirrors the
8144        // same fix in emitUpdateSubquerySourceEdges (slice 83). When two
8145        // relations collide after lowercasing (unusual, but not guaranteed
8146        // caught by the duplicate-alias preflight in all call paths per
8147        // codex Q1 advisory), last-write-wins — the same policy as slice 83.
8148        Map<String, RelationSource> aliasToRelation = new HashMap<>();
8149        for (RelationSource r : stmt.getRelations()) {
8150            String key = r.getAlias();
8151            if (key == null || key.isEmpty()) continue;
8152            aliasToRelation.put(key.toLowerCase(Locale.ROOT), r);
8153        }
8154        List<OutputColumn> outputs = stmt.getOutputColumns();
8155        for (int outOrdinal = 0; outOrdinal < outputs.size(); outOrdinal++) {
8156            OutputColumn out = outputs.get(outOrdinal);
8157            // Slice 11: scalar-subquery projections have empty sources
8158            // by construction; their lineage edge is a single
8159            // STATEMENT_OUTPUT → STATEMENT_OUTPUT pointing at the
8160            // extracted scalar body's only output. Emit it once and
8161            // skip the per-source loop (which would be a no-op anyway).
8162            ScalarInfo scalar = ordinalToScalarInfo.get(outOrdinal);
8163            if (scalar != null) {
8164                lineage.add(new LineageEdge(
8165                        LineageRef.statementOutput(statementIndex, out.getName()),
8166                        LineageRef.statementOutput(scalar.statementIndex,
8167                                scalar.innerOutputName)));
8168                continue;
8169            }
8170            for (ColumnRef src : out.getSources()) {
8171                String srcAlias = src.getRelationAlias();
8172                RelationSource rel = aliasToRelation.get(
8173                        srcAlias == null ? null : srcAlias.toLowerCase(Locale.ROOT));
8174                if (rel == null) {
8175                    throw new SemanticIRBuildException(
8176                            Diagnostic.error(DiagnosticCode.OUTPUT_REFERENCES_UNKNOWN_RELATION,
8177                            "output '" + out.getName() + "' references unknown relation '"
8178                                    + src.getRelationAlias() + "'", null));
8179                }
8180                LineageRef from = LineageRef.statementOutput(statementIndex, out.getName());
8181                LineageRef to;
8182                // Slice 15: resolved-kind dispatch. For OUTER_REFERENCE
8183                // bindings the underlying outerKind decides which
8184                // table-column or statement-output edge we emit.
8185                // Codex round-1 MUST 2 / round-2 MUST 1: exhaustive
8186                // dispatch instead of catch-all.
8187                RelationKind kind = rel.getBinding().getKind();
8188                RelationKind resolvedKind = (kind == RelationKind.OUTER_REFERENCE)
8189                        ? rel.getBinding().getOuterKind()
8190                        : kind;
8191                if (resolvedKind == RelationKind.CTE) {
8192                    Integer cteIndex = cteNameToStatementIndex.get(
8193                            rel.getBinding().getQualifiedName().toLowerCase(Locale.ROOT));
8194                    if (cteIndex == null) {
8195                        throw new SemanticIRBuildException(
8196                                Diagnostic.error(DiagnosticCode.CTE_BODY_MISSING,
8197                                "CTE '" + rel.getBinding().getQualifiedName() + "' has no body statement", null));
8198                    }
8199                    to = LineageRef.statementOutput(cteIndex, src.getColumnName());
8200                } else if (resolvedKind == RelationKind.SUBQUERY) {
8201                    Integer subIndex = subqueryAliasToStatementIndex.get(
8202                            rel.getAlias().toLowerCase(Locale.ROOT));
8203                    if (subIndex == null) {
8204                        throw new SemanticIRBuildException(
8205                                Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_BINDING_UNRESOLVED,
8206                                "FROM-clause subquery '" + rel.getAlias()
8207                                        + "' has no body statement registered", null));
8208                    }
8209                    to = LineageRef.statementOutput(subIndex, src.getColumnName());
8210                } else if (resolvedKind == RelationKind.TABLE) {
8211                    to = LineageRef.tableColumn(
8212                            rel.getBinding().getQualifiedName(),
8213                            src.getColumnName());
8214                } else {
8215                    throw new SemanticIRBuildException(
8216                            Diagnostic.error(DiagnosticCode.OUTPUT_REFERENCES_UNSUPPORTED_BINDING_KIND,
8217                            "output '" + out.getName()
8218                                    + "' references relation '" + rel.getAlias()
8219                                    + "' with unsupported binding kind " + kind
8220                                    + (kind == RelationKind.OUTER_REFERENCE
8221                                        ? " (outerKind=" + rel.getBinding().getOuterKind() + ")"
8222                                        : ""), null));
8223                }
8224                lineage.add(new LineageEdge(from, to));
8225            }
8226        }
8227    }
8228
8229    /**
8230     * Build one SELECT statement (CTE body or outer). The {@code name}
8231     * argument is non-null for a CTE body, null otherwise. When
8232     * {@code hasOuterCteListAlreadyProcessed} is true, the SELECT's own
8233     * {@code getCteList()} is not rejected because the caller has already
8234     * extracted those CTEs into separate statements; in all other cases a
8235     * non-empty CTE list on this node is rejected (so nested WITH inside
8236     * a CTE body does not silently slip through).
8237     */
8238    private static StatementGraph buildSelectStatement(TSelectSqlStatement select,
8239                                                       NameBindingProvider provider,
8240                                                       String name,
8241                                                       boolean hasOuterCteListAlreadyProcessed,
8242                                                       boolean allowFromSubqueries,
8243                                                       boolean allowScalarProjectionSubqueries,
8244                                                       boolean allowWindowProjection) {
8245        // Slice 23: legacy 7-arg call site. Predicate-subquery extraction is
8246        // disabled (allowJoinOnPredicateSubqueries=false) and this is not a
8247        // predicate body itself (isPredicateBody=false). All non-outer call
8248        // sites use this overload — the slice-17 `rejectSubqueriesInJoinOn`
8249        // continues to fire at every non-outer JOIN-ON site.
8250        return buildSelectStatementImpl(select, provider, name,
8251                hasOuterCteListAlreadyProcessed,
8252                allowFromSubqueries,
8253                allowScalarProjectionSubqueries,
8254                allowWindowProjection,
8255                /*allowJoinOnPredicateSubqueries=*/ false,
8256                /*stmtsForExtraction=*/ null,
8257                /*lineageForExtraction=*/ null,
8258                /*cteMapForExtraction=*/ null,
8259                /*isPredicateBody=*/ false,
8260                /*whereClauseContext=*/ PredicateClauseContext.SELECT_WHERE,
8261                /*allowWherePredicateSubqueries=*/ false);
8262    }
8263
8264    /**
8265     * Internal body shared between the legacy 7-arg overload and the
8266     * outer-SELECT entry point used by {@link #build}. Slice 23 added two new
8267     * concepts; slice 24 added one more.
8268     * <ul>
8269     *   <li>{@code allowJoinOnPredicateSubqueries} + {@code stmts}/{@code lineage}
8270     *       — when {@code allow...} is {@code true}, JOIN-ON uncorrelated
8271     *       EXISTS subqueries are extracted as their own
8272     *       {@code <predicate_subquery_<i>>} statements appended to
8273     *       {@code stmts} (slice-11/12 synthetic-name pattern). Outer-SELECT
8274     *       entry only.</li>
8275     *   <li>{@code isPredicateBody} — when {@code true}, this statement IS
8276     *       the inner SELECT of an extracted EXISTS body. The constant-only
8277     *       projection rejection in {@link #buildOutputColumns} is bypassed
8278     *       and a single synthetic OutputColumn is emitted in its place.</li>
8279     *   <li>{@code cteMapForExtraction} (slice 24) — outer's CTE
8280     *       name-to-statement-index map, plumbed in only when
8281     *       {@code allowJoinOnPredicateSubqueries=true}. Required so the
8282     *       slice-24 column-bearing inner projection can emit
8283     *       STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage edges into outer-
8284     *       visible CTE bodies. Non-outer call sites pass {@code null}.</li>
8285     * </ul>
8286     */
8287    private static StatementGraph buildSelectStatementImpl(
8288            TSelectSqlStatement select,
8289            NameBindingProvider provider,
8290            String name,
8291            boolean hasOuterCteListAlreadyProcessed,
8292            boolean allowFromSubqueries,
8293            boolean allowScalarProjectionSubqueries,
8294            boolean allowWindowProjection,
8295            boolean allowJoinOnPredicateSubqueries,
8296            List<StatementGraph> stmtsForExtraction,
8297            List<LineageEdge> lineageForExtraction,
8298            Map<String, Integer> cteMapForExtraction,
8299            boolean isPredicateBody,
8300            PredicateClauseContext whereClauseContext,
8301            boolean allowWherePredicateSubqueries) {
8302        rejectUnsupportedShape(select, hasOuterCteListAlreadyProcessed);
8303        boolean distinct = resolveDistinctFlag(select);
8304        // Slice 65 — reset using scope at entry so a parent SELECT's
8305        // scope cannot leak into recursive nested builds. The using
8306        // scope for THIS SELECT is installed only AFTER buildRelations
8307        // completes (see below) so the predicate-subquery extraction
8308        // walk inside buildRelations does not inherit the outer scope
8309        // (codex slice-65 diff-review round-1 P2 #1: an inner
8310        // {@code EXISTS (SELECT SUM(x.v) FILTER (WHERE k > 0) FROM x)}
8311        // would have its bare `k` expand to outer's merged sources,
8312        // causing a valid uncorrelated body to be rejected as
8313        // correlated). The slice-64 → 65 JOIN-ON merged-key reject
8314        // also runs BEFORE buildRelations so ON-clause refs aren't
8315        // collected with a stale or future scope.
8316        provider = provider.withUsingScope(UsingScope.EMPTY);
8317        rejectUnqualifiedMergedKeyInJoinOn(select, provider);
8318        List<ColumnRef> joinRefs = new ArrayList<>();
8319        List<RelationSource> relations;
8320        if (isSetOpBranchSyntheticName(name)
8321                && hasNoFromSource(select)
8322                && allResultColumnsAreConstantExpressions(select)) {
8323            // Slice 61: allow FROM-less constant-only set-op branches
8324            // such as SELECT 1 UNION ALL SELECT 2. The general SELECT
8325            // boundary remains unchanged: non-branch SELECT 1 still
8326            // fails in buildRelations with "must have at least one
8327            // FROM source".
8328            relations = Collections.emptyList();
8329        } else {
8330            relations = buildRelations(select, provider, joinRefs,
8331                    allowFromSubqueries,
8332                    allowJoinOnPredicateSubqueries,
8333                    stmtsForExtraction, lineageForExtraction, cteMapForExtraction);
8334        }
8335        // Slice 65 — install this SELECT's own using scope AFTER
8336        // buildRelations / predicate-subquery extraction. From here
8337        // forward the clause collectors (output / filter / groupBy /
8338        // having / orderBy) see the merged-key scope for THIS SELECT.
8339        UsingScope ownScope = buildUsingScope(select, provider);
8340        if (!ownScope.isEmpty()) {
8341            provider = provider.withUsingScope(ownScope);
8342        }
8343        List<OutputColumn> outputColumns = buildOutputColumns(select, provider,
8344                allowScalarProjectionSubqueries, allowWindowProjection,
8345                isPredicateBody, name);
8346        // Slice 112 — thread the SELECT path's outer extraction context
8347        // through buildFilterColumnRefs so top-level SELECT WHERE can
8348        // lift uncorrelated predicate-subquery wrappers via the
8349        // slice-23+ extraction pipeline (PredicateClauseContext.SELECT_WHERE).
8350        // Slice 113 — the same threading extends to set-op branch WHERE
8351        // via PredicateClauseContext.SET_OP_BRANCH_WHERE, distinguished
8352        // only by clauseLabel for diagnostic messages (codes are shared).
8353        //
8354        // {@code allowWherePredicateSubqueries} is INDEPENDENT of
8355        // {@code allowJoinOnPredicateSubqueries} (slice 113 split):
8356        // set-op branches admit WHERE-side predicate subqueries while
8357        // KEEPING JOIN-ON predicate subqueries rejected (slice 23 / 26
8358        // contract — pinned by Slice23Test#existsInSetOpBranchJoinOnStillRejected
8359        // and Slice26Test#lhsSubqueryInSetOpBranchRejected). Nested
8360        // SELECTs without extraction context
8361        // (allowWherePredicateSubqueries=false) keep the slice-80
8362        // blanket reject inside buildFilterColumnRefs.
8363        List<ColumnRef> filterRefs = buildFilterColumnRefs(select, provider,
8364                allowWherePredicateSubqueries,
8365                stmtsForExtraction, lineageForExtraction, cteMapForExtraction,
8366                whereClauseContext);
8367        List<ColumnRef> groupByRefs = buildGroupByColumnRefs(select, provider);
8368        List<ColumnRef> havingRefs = buildHavingColumnRefs(select, provider);
8369        List<ColumnRef> orderByRefs = buildOrderByColumnRefs(select, provider, outputColumns);
8370        // Slice 73: DISTINCT ON refs collected here so they observe the
8371        // same {@code provider} (with UsingScope already installed) used
8372        // by buildGroupByColumnRefs / buildHavingColumnRefs /
8373        // buildOrderByColumnRefs. This keeps `DISTINCT ON (k)` over
8374        // `JOIN ... USING (k)` consistent with slice-65 merged-key
8375        // semantics and prevents parent-scope leakage into nested
8376        // builds.
8377        List<ColumnRef> distinctOnRefs = buildDistinctOnColumnRefs(select, provider);
8378        RowLimit rowLimit = buildRowLimit(select);
8379        return new StatementGraph(name, "SELECT", relations, outputColumns,
8380                filterRefs, joinRefs, groupByRefs, havingRefs, orderByRefs,
8381                distinctOnRefs,
8382                distinct,
8383                /*setOperator=*/ null,
8384                rowLimit);
8385    }
8386
8387    /**
8388     * Slices 70 and 71: build per-statement row-limit metadata from
8389     * {@code TLimitClause}, {@code TTopClause}, {@code TOffsetClause},
8390     * or {@code TFetchFirstClause}. Returns {@code null} when no
8391     * row-limit clause is present. All admit / reject decisions for
8392     * single-SELECT row-limit clauses live here; the set-op outer
8393     * row-limit path is rejected separately by
8394     * {@link #rejectSetOpRowLimit} (slice 72 lifts).
8395     *
8396     * <h4>Admitted shapes</h4>
8397     * <ul>
8398     *   <li>{@link RowLimitKind#LIMIT} — {@code TLimitClause} with
8399     *       non-null {@code getRow_count()}. Offset is populated when
8400     *       {@code TLimitClause.getOffset() != null} (PG / MySQL /
8401     *       SQLite / BigQuery / Snowflake / Redshift inline
8402     *       {@code LIMIT N OFFSET M}, MySQL old-style {@code LIMIT M, N},
8403     *       Informix {@code SKIP m LIMIT n}).</li>
8404     *   <li>{@link RowLimitKind#FETCH_FIRST} — {@code TLimitClause} with
8405     *       non-null {@code getSelectFetchFirstValue()} (PG
8406     *       {@code FETCH FIRST}, Informix {@code FIRST n}). Offset is
8407     *       populated when present (PG
8408     *       {@code OFFSET m FETCH FIRST n}, Informix
8409     *       {@code SKIP m FIRST n}). Also fires for
8410     *       {@code TFetchFirstClause} with non-null
8411     *       {@code getFetchValue()} (Oracle / SQL Server
8412     *       {@code FETCH FIRST/NEXT N ROWS ONLY}) when no
8413     *       {@code TOffsetClause} is present.</li>
8414     *   <li>{@link RowLimitKind#TOP} — {@code TTopClause} with non-null
8415     *       {@code getExpr()} and neither {@code isPercent()} nor
8416     *       {@code isWithties()} set. SQL Server {@code SELECT TOP N}.</li>
8417     *   <li>{@link RowLimitKind#OFFSET_FETCH} — Oracle / SQL Server
8418     *       {@code OFFSET m ROWS [FETCH NEXT n ROWS ONLY]} routed via
8419     *       the dedicated {@code TOffsetClause} + {@code TFetchFirstClause}
8420     *       pair, and PG offset-only {@code OFFSET m} routed via
8421     *       {@code TLimitClause.getOffset()} when {@code row_count} and
8422     *       {@code selectFetchFirstValue} are both null.
8423     *       {@link RowLimit#getCount()} may be {@code null} for
8424     *       offset-only forms.</li>
8425     * </ul>
8426     *
8427     * <h4>Rejects</h4>
8428     * <ul>
8429     *   <li>{@link DiagnosticCode#ROW_LIMIT_TOP_PERCENT_NOT_SUPPORTED}
8430     *       — {@code TOP N PERCENT}. The sampling semantics differ from
8431     *       fixed-row {@code LIMIT} enough to warrant a dedicated slice.</li>
8432     *   <li>{@link DiagnosticCode#ROW_LIMIT_TOP_WITH_TIES_NOT_SUPPORTED}
8433     *       — {@code TOP N WITH TIES}. Requires modeling the ORDER BY
8434     *       tie-handling interaction; deferred.</li>
8435     *   <li>{@link DiagnosticCode#ROW_LIMIT_HIVE_LIMIT_GRAMMAR_QUIRK} —
8436     *       Hive single-argument {@code LIMIT N} parser routes the
8437     *       count through {@code TLimitClause.getOffset()} with
8438     *       {@code row_count == null}, which is indistinguishable at
8439     *       the AST level from PG offset-only {@code OFFSET m}. Pinning
8440     *       this with a vendor-specific guard prevents emitting
8441     *       semantically-wrong {@code OFFSET_FETCH} metadata for what
8442     *       the SQL author wrote as a LIMIT. A future grammar fix
8443     *       should route the count through {@code getRow_count()}; this
8444     *       guard can be removed then.</li>
8445     *   <li>{@link DiagnosticCode#ROW_LIMIT_LIMIT_NOT_SUPPORTED} —
8446     *       Vertica TIMESERIES windowing on {@code TLimitClause}
8447     *       ({@code getWindowDef() != null}). Defensive; not modeled.</li>
8448     *   <li>{@link DiagnosticCode#ROW_LIMIT_COUNT_UNRESOLVED} — the
8449     *       parser constructed a row-limit clause node but did not
8450     *       populate any count slot:
8451     *       <ul>
8452     *         <li>{@code TLimitClause} with {@code row_count},
8453     *             {@code selectFetchFirstValue}, and {@code offset} all
8454     *             null (defensive; not observed in probe runs).</li>
8455     *         <li>{@code TFetchFirstClause} with null fetchValue —
8456     *             ANSI / DB2 grammar incompleteness (the parser
8457     *             constructs the clause node but does not populate the
8458     *             count). Future grammar fix can lift this.</li>
8459     *         <li>{@code TTopClause} with null expression (defensive).</li>
8460     *       </ul></li>
8461     * </ul>
8462     */
8463    private static RowLimit buildRowLimit(TSelectSqlStatement select) {
8464        TLimitClause limit = select.getLimitClause();
8465        if (limit != null) {
8466            // Vertica TIMESERIES window on TLimitClause — defensive; rare.
8467            // Pre-empts the row_count / fff branches because the windowed
8468            // form is its own semantic surface.
8469            if (limit.getWindowDef() != null) {
8470                throw new SemanticIRBuildException(
8471                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_LIMIT_NOT_SUPPORTED,
8472                        "row-limit clause LIMIT with Vertica TIMESERIES window "
8473                                + "is not supported yet", limit));
8474            }
8475
8476            TExpression rc = limit.getRow_count();
8477            TExpression off = limit.getOffset();
8478            TExpression fff = limit.getSelectFetchFirstValue();
8479
8480            // Hive single-argument LIMIT parser quirk: the count ends
8481            // up on offset with row_count=null. Vendor-conditional
8482            // because the same AST shape is legitimate PG offset-only.
8483            if (select.dbvendor == EDbVendor.dbvhive
8484                    && rc == null && off != null && fff == null) {
8485                throw new SemanticIRBuildException(
8486                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_HIVE_LIMIT_GRAMMAR_QUIRK,
8487                        "Hive single-argument LIMIT N is currently mis-routed "
8488                                + "by the parser (count appears on TLimitClause.getOffset() "
8489                                + "with row_count=null); fix the Hive grammar to route "
8490                                + "the count through getRow_count() to lift this guard", limit));
8491            }
8492
8493            if (rc != null) {
8494                // LIMIT N with optional OFFSET M (PG/MySQL/SQLite/
8495                // BigQuery/Snowflake/Redshift inline LIMIT-OFFSET,
8496                // MySQL old-style LIMIT M,N, Informix SKIP m LIMIT n).
8497                return new RowLimit(RowLimitKind.LIMIT,
8498                        rc.toString(),
8499                        off != null ? off.toString() : null);
8500            }
8501            if (fff != null) {
8502                // FETCH FIRST via the PG/Informix routing through
8503                // TLimitClause, with optional OFFSET (PG
8504                // OFFSET m FETCH FIRST n; Informix SKIP m FIRST n).
8505                return new RowLimit(RowLimitKind.FETCH_FIRST,
8506                        fff.toString(),
8507                        off != null ? off.toString() : null);
8508            }
8509            if (off != null) {
8510                // Offset-only via TLimitClause (PG OFFSET m [ROWS]).
8511                return new RowLimit(RowLimitKind.OFFSET_FETCH,
8512                        /*count=*/ null,
8513                        off.toString());
8514            }
8515            // Defensive: TLimitClause present with all four slots null.
8516            throw new SemanticIRBuildException(
8517                    Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8518                    "row-limit clause LIMIT is present but no count, offset, "
8519                            + "or FETCH FIRST value is populated on the parser AST", limit));
8520        }
8521
8522        TTopClause top = select.getTopClause();
8523        if (top != null) {
8524            if (top.isPercent()) {
8525                throw new SemanticIRBuildException(
8526                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_TOP_PERCENT_NOT_SUPPORTED,
8527                        "row-limit clause TOP N PERCENT is not supported yet; "
8528                                + "sampling semantics warrant a dedicated slice", top));
8529            }
8530            if (top.isWithties()) {
8531                throw new SemanticIRBuildException(
8532                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_TOP_WITH_TIES_NOT_SUPPORTED,
8533                        "row-limit clause TOP N WITH TIES is not supported yet; "
8534                                + "tie-handling semantics warrant a dedicated slice", top));
8535            }
8536            TExpression e = top.getExpr();
8537            if (e == null) {
8538                throw new SemanticIRBuildException(
8539                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8540                        "row-limit clause TOP is present but the count expression "
8541                                + "is not populated on the parser AST", top));
8542            }
8543            return new RowLimit(RowLimitKind.TOP, e.toString(), /*offset=*/ null);
8544        }
8545
8546        TOffsetClause offClause = select.getOffsetClause();
8547        TFetchFirstClause fetch = select.getFetchFirstClause();
8548        if (offClause != null) {
8549            // Oracle / SQL Server OFFSET m ROWS [FETCH NEXT n ROWS ONLY].
8550            // The optional FETCH NEXT counterpart populates
8551            // TFetchFirstClause when present.
8552            String offsetText = offClause.getSelectOffsetValue() != null
8553                    ? offClause.getSelectOffsetValue().toString()
8554                    : null;
8555            if (offsetText == null) {
8556                throw new SemanticIRBuildException(
8557                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8558                        "row-limit clause OFFSET is present but the offset value "
8559                                + "is not populated on the parser AST", offClause));
8560            }
8561            String countText = null;
8562            if (fetch != null && fetch.getFetchValue() != null) {
8563                countText = fetch.getFetchValue().toString();
8564            }
8565            return new RowLimit(RowLimitKind.OFFSET_FETCH, countText, offsetText);
8566        }
8567        if (fetch != null) {
8568            if (fetch.getFetchValue() != null) {
8569                // Oracle / SQL Server FETCH FIRST/NEXT N ROWS ONLY
8570                // without OFFSET (TOffsetClause was null above).
8571                return new RowLimit(RowLimitKind.FETCH_FIRST,
8572                        fetch.getFetchValue().toString(), /*offset=*/ null);
8573            }
8574            // ANSI / DB2: TFetchFirstClause is non-null but fetchValue
8575            // is null because the grammar does not pass the count into
8576            // the node initializer. Reject so the gap is visible.
8577            throw new SemanticIRBuildException(
8578                    Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8579                    "row-limit clause FETCH FIRST is present but the count "
8580                            + "expression is not populated on the parser AST "
8581                            + "(ANSI / DB2 grammar gap)", fetch));
8582        }
8583        return null;
8584    }
8585
8586    /**
8587     * Slice 72: build the OUTER set-op statement's row-limit. Same
8588     * decision tree as {@link #buildRowLimit} for SELECT-level routing
8589     * (PG/MySQL/SQLite/BigQuery/Snowflake/Redshift via
8590     * {@code TLimitClause}, plus Hive/Vertica/ANSI-DB2 defensives),
8591     * with an additional MSSQL-only fallback that reads the OFFSET /
8592     * FETCH FIRST clauses off the outer {@code TOrderBy} node.
8593     *
8594     * <p>Empirical AST shapes (probed against the current parser):
8595     * <ul>
8596     *   <li>PG / MySQL / SQLite / BigQuery / Snowflake / Redshift route
8597     *       set-op outer LIMIT / OFFSET / FETCH FIRST onto
8598     *       {@code setOp.getLimitClause()} — handled by the primary
8599     *       {@code buildRowLimit} path.</li>
8600     *   <li>MSSQL routes set-op outer {@code OFFSET m ROWS [FETCH NEXT
8601     *       n ROWS ONLY]} EXCLUSIVELY onto
8602     *       {@code setOp.getOrderbyClause().getOffsetClause()} /
8603     *       {@code .getFetchFirstClause()} — NOT duplicated onto the
8604     *       SELECT node (opposite of single-SELECT MSSQL where slice 71
8605     *       saw duplication onto both). The TOrderBy fallback below
8606     *       handles this.</li>
8607     *   <li>Oracle drops set-op outer OFFSET / FETCH from both SELECT
8608     *       and TOrderBy slots silently; nothing for slice 72 to
8609     *       emit. A future Oracle grammar fix can lift this.</li>
8610     * </ul>
8611     *
8612     * <p>The TOrderBy fallback is vendor-gated to MSSQL to avoid
8613     * over-admitting on unprobed dialects (per codex round-1 B1).
8614     * Kind mapping mirrors {@link #buildRowLimit}'s single-SELECT
8615     * decision tree:
8616     * <ul>
8617     *   <li>{@code TOffsetClause} + {@code TFetchFirstClause} both
8618     *       populated → {@code OFFSET_FETCH/count/offset}</li>
8619     *   <li>{@code TOffsetClause} only → {@code OFFSET_FETCH/null/offset}</li>
8620     *   <li>{@code TFetchFirstClause} only → {@code FETCH_FIRST/count/null}
8621     *       (unreachable via current MSSQL grammar which requires
8622     *       OFFSET before FETCH; retained as defensive routing-shape
8623     *       parity with single-SELECT)</li>
8624     *   <li>Defensive null-value rejects mirror single-SELECT
8625     *       {@link #buildRowLimit}: a present {@code TOffsetClause}
8626     *       with a null offset value throws
8627     *       {@code ROW_LIMIT_COUNT_UNRESOLVED}; a present bare
8628     *       {@code TFetchFirstClause} (no companion OFFSET) with a
8629     *       null fetch value throws the same code. When both clauses
8630     *       are present, only the offset slot must be populated; a
8631     *       null fetch value is silently treated as offset-only
8632     *       (matches the single-SELECT
8633     *       {@code TOffsetClause + TFetchFirstClause} branch in
8634     *       {@code buildRowLimit}).</li>
8635     * </ul>
8636     */
8637    private static RowLimit buildSetOpRowLimit(TSelectSqlStatement setOp) {
8638        // Primary path: SELECT-level routing. Covers PG/MySQL/SQLite/
8639        // BigQuery/Snowflake/Redshift via TLimitClause, plus inherited
8640        // Hive / Vertica / ANSI-DB2 defensives from buildRowLimit.
8641        RowLimit fromSelect = buildRowLimit(setOp);
8642        if (fromSelect != null) {
8643            return fromSelect;
8644        }
8645        // MSSQL-only TOrderBy fallback (codex B1 vendor gate).
8646        if (setOp.dbvendor != EDbVendor.dbvmssql) {
8647            return null;
8648        }
8649        TOrderBy orderBy = setOp.getOrderbyClause();
8650        if (orderBy == null) return null;
8651        TOffsetClause oc = orderBy.getOffsetClause();
8652        TFetchFirstClause fc = orderBy.getFetchFirstClause();
8653        if (oc == null && fc == null) return null;
8654
8655        String offsetText = (oc != null && oc.getSelectOffsetValue() != null)
8656                ? oc.getSelectOffsetValue().toString() : null;
8657        String countText = (fc != null && fc.getFetchValue() != null)
8658                ? fc.getFetchValue().toString() : null;
8659
8660        if (oc != null && fc != null) {
8661            if (offsetText == null) {
8662                // Mirrors single-SELECT buildRowLimit TOffsetClause path:
8663                // a present OFFSET clause must populate its value (the
8664                // FETCH NEXT counterpart is optional; null countText is
8665                // silently treated as offset-only).
8666                throw new SemanticIRBuildException(
8667                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8668                        "MSSQL set-op outer OFFSET clause present on TOrderBy "
8669                                + "but offset value is not populated on the parser AST", orderBy));
8670            }
8671            return new RowLimit(RowLimitKind.OFFSET_FETCH, countText, offsetText);
8672        }
8673        if (oc != null) {
8674            if (offsetText == null) {
8675                throw new SemanticIRBuildException(
8676                        Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8677                        "MSSQL set-op outer OFFSET clause present on TOrderBy "
8678                                + "but offset value is not populated on the parser AST", orderBy));
8679            }
8680            return new RowLimit(RowLimitKind.OFFSET_FETCH, /*count=*/ null, offsetText);
8681        }
8682        // fc only (oc == null). Defensive: not reachable via current
8683        // MSSQL grammar which requires OFFSET before FETCH NEXT.
8684        if (countText == null) {
8685            throw new SemanticIRBuildException(
8686                    Diagnostic.error(DiagnosticCode.ROW_LIMIT_COUNT_UNRESOLVED,
8687                    "MSSQL set-op outer FETCH FIRST clause present on TOrderBy "
8688                            + "but fetch value is not populated on the parser AST", orderBy));
8689        }
8690        return new RowLimit(RowLimitKind.FETCH_FIRST, countText, /*offset=*/ null);
8691    }
8692
8693    /**
8694     * Resolve a {@link TSelectSqlStatement}'s row-filter clause to the IR's
8695     * {@code distinct} flag. Mapping:
8696     *
8697     * <ul>
8698     *   <li>no clause / {@code urfNone} / {@code urfAll}: {@code false}</li>
8699     *   <li>{@code urfDistinct}: {@code true}</li>
8700     *   <li>{@code urfUnique}: {@code true} — Oracle treats
8701     *       {@code SELECT UNIQUE} as a deprecated synonym for
8702     *       {@code SELECT DISTINCT}; both produce the same row-set.</li>
8703     *   <li>{@code urfDistinctOn}: admits (slice 73). Returns
8704     *       {@code true} for the boolean flag; the
8705     *       {@code DISTINCT ON (cols)} partition keys are collected
8706     *       separately by
8707     *       {@link #buildDistinctOnColumnRefs(TSelectSqlStatement,
8708     *       NameBindingProvider)} so the column-ref collection runs
8709     *       AFTER {@code UsingScope} is installed (matching the timing
8710     *       of {@link #buildGroupByColumnRefs} and friends).</li>
8711     *   <li>{@code urfDistinctRow}, {@code urfNormalize}: rejected
8712     *       (vendor-specific; not yet a documented IR shape).</li>
8713     *   <li>null filter on a non-null {@code TSelectDistinct}, or a
8714     *       new enum value the switch hasn't seen yet: rejected, so a
8715     *       future {@code EUniqueRowFilterType} addition fails loudly
8716     *       rather than silently classifying as {@code distinct=false}.</li>
8717     * </ul>
8718     */
8719    private static boolean resolveDistinctFlag(TSelectSqlStatement select) {
8720        TSelectDistinct sd = select.getSelectDistinct();
8721        if (sd == null) return false;
8722        EUniqueRowFilterType urf = sd.getUniqueRowFilter();
8723        if (urf == null) {
8724            throw new SemanticIRBuildException(
8725                    Diagnostic.error(DiagnosticCode.SELECT_ROW_FILTER_NULL,
8726                    "SELECT row-filter is null; expected one of "
8727                            + "{none, all, distinct, unique}", select));
8728        }
8729        switch (urf) {
8730            case urfNone:
8731            case urfAll:
8732                return false;
8733            case urfDistinct:
8734            case urfUnique:        // Oracle deprecated synonym for DISTINCT
8735            case urfDistinctOn:    // slice 73: refs collected separately
8736                return true;
8737            case urfDistinctRow:
8738            case urfNormalize:
8739                throw new SemanticIRBuildException(
8740                        Diagnostic.error(DiagnosticCode.SELECT_ROW_FILTER_NOT_SUPPORTED,
8741                        "SELECT row-filter " + urf + " is not supported yet", select));
8742            default:
8743                throw new SemanticIRBuildException(
8744                        Diagnostic.error(DiagnosticCode.SELECT_ROW_FILTER_UNKNOWN,
8745                        "unknown SELECT row-filter " + urf, select));
8746        }
8747    }
8748
8749    /**
8750     * Slice 73: collect physical column references from a
8751     * {@code SELECT DISTINCT ON (cols)} expression list. Returns the
8752     * empty list for plain {@code DISTINCT}, {@code UNIQUE},
8753     * {@code ALL}, and the no-filter case. Only PostgreSQL and
8754     * Greenplum expose {@code urfDistinctOn} with a populated
8755     * {@link TSelectDistinct#getExpressionList()}; Oracle, MySQL, and
8756     * Redshift silently drop the {@code ON (...)} clause and parse the
8757     * SELECT as plain {@code DISTINCT}, so this helper returns
8758     * {@code []} for those vendors regardless of the surface SQL.
8759     *
8760     * <p>Mirrors {@link #buildGroupByColumnRefs}: subqueries and window
8761     * functions in the expression list are rejected BEFORE
8762     * {@link #collectColumnRefs} descends, so inner-scope refs cannot
8763     * leak into {@code distinctOnColumnRefs}. Compound expressions
8764     * ({@code a + b}, {@code CASE WHEN ...}) and aggregate arguments
8765     * ({@code COUNT(x)}) are descended into so the underlying column
8766     * refs are captured.
8767     */
8768    private static List<ColumnRef> buildDistinctOnColumnRefs(
8769            TSelectSqlStatement select, NameBindingProvider provider) {
8770        TSelectDistinct sd = select.getSelectDistinct();
8771        if (sd == null
8772                || sd.getUniqueRowFilter() != EUniqueRowFilterType.urfDistinctOn) {
8773            return new ArrayList<>();
8774        }
8775        TExpressionList el = sd.getExpressionList();
8776        if (el == null || el.size() == 0) {
8777            // PG grammar requires at least one expression after
8778            // DISTINCT ON (; this branch is defensive — surface a
8779            // clear diagnostic rather than silently emit [].
8780            throw new SemanticIRBuildException(
8781                    Diagnostic.error(DiagnosticCode.DISTINCT_ON_EMPTY_COLUMN_LIST,
8782                    "DISTINCT ON requires at least one expression but the "
8783                            + "AST exposes an empty list", sd));
8784        }
8785        // Iterate items explicitly so each per-expression reject
8786        // diagnostic points at the offending expression. Equivalent
8787        // to running containsAnySubquery / rejectWindowFunctionInScope
8788        // / collectColumnRefs on the whole list (TExpressionList
8789        // inherits TParseTreeNodeList.acceptChildren which already
8790        // iterates element children), but the loop body gives
8791        // clearer rejection sites and lets us dedup refs across
8792        // expressions in declaration order.
8793        List<ColumnRef> refs = new ArrayList<>();
8794        for (int i = 0; i < el.size(); i++) {
8795            TExpression expr = el.getExpression(i);
8796            if (containsAnySubqueryExpression(expr)) {
8797                throw new SemanticIRBuildException(
8798                        Diagnostic.error(DiagnosticCode.DISTINCT_ON_HAS_SUBQUERY_NOT_SUPPORTED,
8799                        "DISTINCT ON expression list contains a subquery; "
8800                                + "subqueries in DISTINCT ON are not supported yet", sd));
8801            }
8802            rejectWindowFunctionInScope(expr, "DISTINCT ON expression list");
8803            for (ColumnRef ref : collectColumnRefs(expr, provider)) {
8804                if (!refs.contains(ref)) refs.add(ref);
8805            }
8806        }
8807        return refs;
8808    }
8809
8810    private static boolean hasNoFromSource(TSelectSqlStatement select) {
8811        return select.joins == null || select.joins.size() == 0;
8812    }
8813
8814    private static boolean allResultColumnsAreConstantExpressions(TSelectSqlStatement select) {
8815        TResultColumnList rcl = select.getResultColumnList();
8816        if (rcl == null || rcl.size() == 0) return false;
8817        for (int i = 0; i < rcl.size(); i++) {
8818            TResultColumn rc = rcl.getResultColumn(i);
8819            if (rc == null || rc.getExpr() == null || !isConstantExpression(rc.getExpr())) {
8820                return false;
8821            }
8822        }
8823        return true;
8824    }
8825
8826    private static List<ColumnRef> buildGroupByColumnRefs(TSelectSqlStatement select, NameBindingProvider provider) {
8827        TGroupBy groupBy = select.getGroupByClause();
8828        if (groupBy == null || groupBy.getItems() == null || groupBy.getItems().size() == 0) {
8829            return new ArrayList<>();
8830        }
8831        TGroupByItemList items = groupBy.getItems();
8832        // Slice 61: reject subqueries in GROUP BY before collectColumnRefs
8833        // descends into them. Pre-slice-61, queries such as `SELECT 1
8834        // FROM employees GROUP BY (SELECT id FROM departments)` reached
8835        // the constant-only projection guard and failed there; with the
8836        // slice-61 lift the projection now builds and the GROUP BY
8837        // visitor would leak `departments.id` into groupByColumnRefs
8838        // even though `departments` is not in {@code relations}, breaking
8839        // the IR invariant that column refs reference an in-scope
8840        // relation. Mirrors the WHERE / HAVING / ORDER BY subquery
8841        // guards.
8842        if (containsAnySubquery(items)) {
8843            throw new SemanticIRBuildException(
8844                    Diagnostic.error(DiagnosticCode.GROUP_BY_HAS_SUBQUERY_NOT_SUPPORTED,
8845                    "GROUP BY clause contains a subquery; subqueries in "
8846                            + "GROUP BY are not supported yet", groupBy));
8847        }
8848        // Slice 13: reject window functions in GROUP BY before
8849        // collectColumnRefs descends.
8850        rejectWindowFunctionInScope(items, "GROUP BY clause");
8851        // Visitor-based collection ensures column refs in any nested
8852        // expression (e.g. GROUP BY date_trunc('day', t)) are captured.
8853        return collectColumnRefs(items, provider);
8854    }
8855
8856    /**
8857     * Collect physical column references from the {@code HAVING} clause.
8858     *
8859     * <p>HAVING is supported regardless of whether {@code GROUP BY} is
8860     * present: standard SQL allows {@code HAVING} without {@code GROUP BY}
8861     * (the whole result set is treated as a single group), and the parser
8862     * still attaches a {@link TGroupBy} node with empty
8863     * {@code getItems()} in that case. Both shapes flow through the same
8864     * collection path.
8865     *
8866     * <p>Per-shape rejections fire <i>before</i> {@link #collectColumnRefs}
8867     * so subquery / OVER children never enter the visitor and can't leak
8868     * inner-scope refs into {@code havingColumnRefs} (mirrors slice-9
8869     * ORDER BY guards):
8870     *
8871     * <ul>
8872     *   <li>Scalar subqueries ({@link EExpressionType#subquery_t}) and
8873     *       predicate subqueries ({@code EXISTS}, {@code IN (SELECT ...)},
8874     *       {@code ANY/ALL/SOME}) — checked via both expression-type and
8875     *       {@link TExpression#getSubQuery()}, deep-scanned through the
8876     *       whole HAVING expression subtree.</li>
8877     *   <li>Window functions ({@code OVER (...)}) — standard SQL forbids
8878     *       window functions in HAVING, but defense in depth: the
8879     *       deep-scan rejecter ensures PARTITION BY / OVER ORDER BY refs
8880     *       can't leak.</li>
8881     * </ul>
8882     *
8883     * <p>Aggregate functions in HAVING are <i>not</i> rejected — they're
8884     * the most common HAVING shape ({@code HAVING SUM(salary) > 1000}).
8885     * The visitor walks into the aggregate's argument list and captures
8886     * the underlying column ref ({@code salary}) the same way slice 6
8887     * does for projection-side aggregate args.
8888     */
8889    private static List<ColumnRef> buildHavingColumnRefs(TSelectSqlStatement select,
8890                                                        NameBindingProvider provider) {
8891        TGroupBy groupBy = select.getGroupByClause();
8892        if (groupBy == null) return new ArrayList<>();
8893        TExpression having = groupBy.getHavingClause();
8894        if (having == null) return new ArrayList<>();
8895        rejectHavingScalarSubquery(having);
8896        rejectHavingWindowFunction(having);
8897        return collectColumnRefs(having, provider);
8898    }
8899
8900    /**
8901     * Reject HAVING expressions that contain a subquery anywhere in the
8902     * subtree. Catches both:
8903     *
8904     * <ul>
8905     *   <li>Scalar subqueries ({@link EExpressionType#subquery_t}) —
8906     *       e.g. {@code HAVING (SELECT MAX(salary) FROM employees) > 0}.</li>
8907     *   <li>Predicate subqueries ({@code EXISTS}, {@code IN (SELECT ...)},
8908     *       {@code ANY/ALL/SOME (SELECT ...)}) — these don't appear as
8909     *       {@code subquery_t} expression nodes but carry a non-null
8910     *       {@link TExpression#getSubQuery()}, e.g.
8911     *       {@code HAVING EXISTS (SELECT 1 FROM ...)} or
8912     *       {@code HAVING d.id IN (SELECT id FROM ...)}.</li>
8913     * </ul>
8914     *
8915     * <p>Mirrors {@link #rejectOrderByScalarSubquery}: top-level fast
8916     * path + visitor deep-scan over {@link TExpression#acceptChildren}.
8917     * The deep scan is required for nested cases like
8918     * {@code HAVING flag = 1 AND EXISTS (SELECT ...)} or
8919     * {@code HAVING CASE WHEN d.id IN (SELECT ...) THEN 1 ELSE 0 END > 0}.
8920     */
8921    private static void rejectHavingScalarSubquery(TExpression having) {
8922        if (having.getExpressionType() == EExpressionType.subquery_t
8923                || having.getSubQuery() != null) {
8924            throw new SemanticIRBuildException(
8925                    Diagnostic.error(DiagnosticCode.HAVING_SUBQUERY_NOT_SUPPORTED,
8926                    "HAVING subquery '" + having + "' is not supported yet "
8927                            + "(subqueries in HAVING would leak inner column refs)", having));
8928        }
8929        final boolean[] found = {false};
8930        having.acceptChildren(new TParseTreeVisitor() {
8931            @Override
8932            public void preVisit(TExpression e) {
8933                if (found[0]) return;
8934                if (e.getExpressionType() == EExpressionType.subquery_t
8935                        || e.getSubQuery() != null) {
8936                    found[0] = true;
8937                }
8938            }
8939        });
8940        if (found[0]) {
8941            throw new SemanticIRBuildException(
8942                    Diagnostic.error(DiagnosticCode.HAVING_HAS_SUBQUERY_NOT_SUPPORTED,
8943                    "HAVING expression '" + having + "' contains a subquery "
8944                            + "(scalar, EXISTS, IN, or ANY/ALL/SOME); not supported yet", having));
8945        }
8946    }
8947
8948    /**
8949     * Reject HAVING expressions that contain a window function. Standard
8950     * SQL forbids window functions in HAVING (analytic functions are
8951     * computed after HAVING), but defense in depth: the visitor would
8952     * descend into {@code OVER (PARTITION BY ... ORDER BY ...)} and the
8953     * inner-scope refs would otherwise leak into {@code havingColumnRefs}.
8954     * Mirrors the projection-side {@link #rejectWindowFunctions} and the
8955     * ORDER BY-side {@link #rejectOrderByWindowFunction}.
8956     */
8957    private static void rejectHavingWindowFunction(TExpression having) {
8958        final boolean[] found = {false};
8959        having.acceptChildren(new TParseTreeVisitor() {
8960            @Override
8961            public void preVisit(TFunctionCall fn) {
8962                if (found[0]) return;
8963                if (fn.getWindowDef() != null) found[0] = true;
8964            }
8965        });
8966        if (!found[0] && having.getExpressionType() == EExpressionType.function_t) {
8967            TFunctionCall fn = having.getFunctionCall();
8968            if (fn != null && fn.getWindowDef() != null) found[0] = true;
8969        }
8970        if (found[0]) {
8971            throw new SemanticIRBuildException(
8972                    Diagnostic.error(DiagnosticCode.HAVING_WINDOW_FUNCTION_NOT_SUPPORTED,
8973                    "HAVING window function '" + having + "' is not supported yet "
8974                            + "(window OVER (...) refs would leak into havingColumnRefs)", having));
8975        }
8976    }
8977
8978    /**
8979     * Collect physical column references from {@code ORDER BY} sort keys.
8980     *
8981     * <p>Per-item validation rejects shapes that would otherwise vanish
8982     * silently into an empty ref list, leak inner-scope refs, or
8983     * misrepresent presentation as a dependency:
8984     *
8985     * <ul>
8986     *   <li>Ordinal references ({@code ORDER BY 1}) — the sort key is a
8987     *       {@link EExpressionType#simple_constant_t}; its meaning is
8988     *       "first projected column" which depends on the SELECT list,
8989     *       not on a base column. A future slice can model output-position
8990     *       references explicitly.</li>
8991     *   <li>Constant sort keys other than ordinals ({@code ORDER BY 'x'},
8992     *       and the compound {@code ORDER BY (1)} / {@code ORDER BY 1+0}
8993     *       caught by the generic no-physical-column-refs check).</li>
8994     *   <li>Projection-alias references ({@code ORDER BY x} where
8995     *       {@code x} is a SELECT alias) — {@link TOrderByItem#doParse}
8996     *       retypes the operand to {@link TObjectName#ttobjColumnAlias},
8997     *       which lowers {@link TObjectName#getDbObjectType()} to
8998     *       {@link EDbObjectType#column_alias}. Without explicit
8999     *       rejection the visitor would skip it and the IR would lose
9000     *       the dependency entirely. The deep-scan version of this
9001     *       check catches alias nodes nested inside expressions.</li>
9002     *   <li>Subqueries in sort keys — scalar
9003     *       ({@link EExpressionType#subquery_t}) and predicate
9004     *       ({@code EXISTS}, {@code IN (SELECT ...)}, {@code ANY/ALL/SOME})
9005     *       — would otherwise leak inner-scope column refs into the
9006     *       outer statement's {@code orderByColumnRefs}.</li>
9007     *   <li>Window functions in sort keys ({@code ORDER BY ROW_NUMBER()
9008     *       OVER (...)}) — the OVER clause descends through the visitor
9009     *       and would leak its PARTITION BY / ORDER BY refs.</li>
9010     * </ul>
9011     *
9012     * <p>Sub-clauses that change row-set semantics are also rejected
9013     * here: Oracle {@code ORDER SIBLINGS BY} (hierarchical, not yet
9014     * modelled), Teradata {@code RESET WHEN} (window-style restart),
9015     * and the {@link TOrderBy}-level {@code FETCH FIRST}/{@code OFFSET}
9016     * defensive guards (in fresh parses the SELECT-level row-limit
9017     * guards in {@link #rejectUnsupportedShape} fire first because
9018     * {@code TSelectSqlNode.setOrderbyClause()} copies in-clause OFFSET/
9019     * FETCH onto the SELECT node).
9020     *
9021     * <p>For everything else (qualified column refs, expressions like
9022     * {@code UPPER(name)}), {@link #collectColumnRefs} runs over each
9023     * sort key and aggregates the physical column refs. A per-item
9024     * empty-refs check catches anything that slipped past the explicit
9025     * shape rejections (e.g. {@code ORDER BY (1)},
9026     * {@code ORDER BY 1 + 0}). Sort direction ({@code ASC}/{@code DESC})
9027     * and null placement ({@code NULLS FIRST}/{@code NULLS LAST}) are
9028     * presentation metadata and are not modelled.
9029     */
9030    private static List<ColumnRef> buildOrderByColumnRefs(TSelectSqlStatement select,
9031                                                          NameBindingProvider provider,
9032                                                          List<OutputColumn> outputColumns) {
9033        TOrderBy orderBy = select.getOrderbyClause();
9034        if (orderBy == null) {
9035            return new ArrayList<>();
9036        }
9037        if (orderBy.isSiblings()) {
9038            throw new SemanticIRBuildException(
9039                    Diagnostic.error(DiagnosticCode.ORDER_SIBLINGS_BY_NOT_SUPPORTED,
9040                    "ORDER SIBLINGS BY is not supported yet "
9041                            + "(Oracle hierarchical ordering)", orderBy));
9042        }
9043        if (orderBy.getResetWhenCondition() != null) {
9044            throw new SemanticIRBuildException(
9045                    Diagnostic.error(DiagnosticCode.ORDER_BY_RESET_WHEN_NOT_SUPPORTED,
9046                    "ORDER BY ... RESET WHEN is not supported yet "
9047                            + "(Teradata window-style restart)", orderBy));
9048        }
9049        // Slice 71: the in-clause OFFSET/FETCH on TOrderBy is no longer
9050        // rejected. MSSQL parsers duplicate OFFSET/FETCH onto BOTH the
9051        // SELECT node AND the TOrderBy node; slice 71 admits at the
9052        // SELECT level via buildRowLimit, so the TOrderBy duplicates
9053        // are simply ignored here. Oracle parsers populate only the
9054        // SELECT-level fields, so the TOrderBy fields are typically
9055        // null there.
9056        TOrderByItemList items = orderBy.getItems();
9057        if (items == null || items.size() == 0) {
9058            return new ArrayList<>();
9059        }
9060        // Validate + collect per item so a sort key contributing zero
9061        // column refs (e.g. constant arithmetic, parenthesised constant)
9062        // is rejected with an item-specific message instead of silently
9063        // disappearing.
9064        LinkedHashSet<ColumnRef> all = new LinkedHashSet<>();
9065        for (int i = 0; i < items.size(); i++) {
9066            TOrderByItem item = items.getOrderByItem(i);
9067            if (item == null) continue;
9068            TExpression sortKey = item.getSortKey();
9069            if (sortKey == null) continue;
9070            // Slice 68: positive-integer ordinals admit. The helper:
9071            //   - returns null when sortKey is not a positive-integer
9072            //     literal (caller falls through to the existing
9073            //     constant / alias / subquery / window rejecters and the
9074            //     standard ref collection);
9075            //   - returns the matching output column's sources list when
9076            //     sortKey IS a positive-integer literal in range;
9077            //   - throws ORDER_BY_ORDINAL_OUT_OF_RANGE when the ordinal
9078            //     is 0 or exceeds the output column count.
9079            // The sourceless-output case (e.g. SELECT 1 FROM t ORDER BY 1
9080            // or SELECT COUNT(*) FROM t ORDER BY 1) returns an empty
9081            // list and falls through to the per-item empty-refs guard
9082            // below, mirroring the existing ORDER BY COUNT(*) /
9083            // ORDER BY 1 + 0 rejection.
9084            List<ColumnRef> ordinalSources = tryResolveOrderByOrdinal(sortKey, outputColumns);
9085            if (ordinalSources != null) {
9086                if (ordinalSources.isEmpty()) {
9087                    throw new SemanticIRBuildException(
9088                            Diagnostic.error(DiagnosticCode.ORDER_BY_NO_PHYSICAL_COLUMN_REFS,
9089                            "ORDER BY ordinal '" + sortKey
9090                                    + "' resolves to output column with no physical column references "
9091                                    + "(constant or sourceless aggregate output)", sortKey));
9092                }
9093                all.addAll(ordinalSources);
9094                continue;
9095            }
9096            // Slice 69: top-level projection-alias references admit. The
9097            // helper returns null for non-alias shapes; an empty list
9098            // (alias of a constant / sourceless aggregate) falls through
9099            // to ORDER_BY_NO_PHYSICAL_COLUMN_REFS, mirroring the slice-68
9100            // sourceless-ordinal handling. Deep-scan alias references
9101            // (e.g. ORDER BY UPPER(<alias>)) are still caught by
9102            // rejectOrderByAliasReference below.
9103            List<ColumnRef> aliasSources = tryResolveOrderByProjectionAlias(sortKey, outputColumns);
9104            if (aliasSources != null) {
9105                if (aliasSources.isEmpty()) {
9106                    throw new SemanticIRBuildException(
9107                            Diagnostic.error(DiagnosticCode.ORDER_BY_NO_PHYSICAL_COLUMN_REFS,
9108                            "ORDER BY projection alias '" + sortKey
9109                                    + "' resolves to output column with no physical column references "
9110                                    + "(constant or sourceless aggregate output)", sortKey));
9111                }
9112                all.addAll(aliasSources);
9113                continue;
9114            }
9115            // Slice 68: non-ordinal constants stay rejected. The original
9116            // helper is preserved for the set-op outer path (which keeps
9117            // its ordinal/constant rejection until slice 72).
9118            rejectOrderByNonOrdinalConstant(sortKey);
9119            // Slice 69: top-level bare alias references are consumed by
9120            // tryResolveOrderByProjectionAlias above; this helper now only
9121            // catches DEEP alias references nested inside compound
9122            // expressions (e.g. ORDER BY UPPER(<alias>)).
9123            rejectOrderByAliasReference(sortKey);
9124            // Reject scalar subqueries and window functions BEFORE
9125            // collecting refs. The visitor descends into both, so without
9126            // these guards `ORDER BY (SELECT MAX(salary) FROM employees)`
9127            // and `ORDER BY ROW_NUMBER() OVER (ORDER BY salary)` would
9128            // leak inner refs into orderByColumnRefs as if the outer
9129            // statement physically depended on them.
9130            rejectOrderByScalarSubquery(sortKey);
9131            rejectOrderByWindowFunction(sortKey);
9132            List<ColumnRef> itemRefs = collectColumnRefs(item, provider);
9133            if (itemRefs.isEmpty()) {
9134                // Anything else that produces no physical column refs:
9135                // ORDER BY (1), ORDER BY 1+0, ORDER BY NULL, ORDER BY
9136                // CASE WHEN 1=1 THEN 'a' END, etc. Reject so the IR
9137                // doesn't silently emit empty refs.
9138                throw new SemanticIRBuildException(
9139                        Diagnostic.error(DiagnosticCode.ORDER_BY_NO_PHYSICAL_COLUMN_REFS,
9140                        "ORDER BY sort key '" + sortKey + "' has no physical column references "
9141                                + "(constant or non-column expressions are not supported yet)", sortKey));
9142            }
9143            all.addAll(itemRefs);
9144        }
9145        return new ArrayList<>(all);
9146    }
9147
9148    /**
9149     * Slice 68: resolve a positive-integer ORDER BY ordinal to the matching
9150     * output column's sources. Returns:
9151     *
9152     * <ul>
9153     *   <li>{@code null} if {@code sortKey} is not a positive-integer
9154     *       literal (caller continues with the constant / alias / subquery
9155     *       / window rejecters and the standard ref collection);</li>
9156     *   <li>a {@link List} of {@link ColumnRef}s — the source list of the
9157     *       output column at position {@code v - 1} (1-based ordinals);</li>
9158     *   <li>throws {@link SemanticIRBuildException} with
9159     *       {@code ORDER_BY_ORDINAL_OUT_OF_RANGE} when {@code v} is 0 or
9160     *       exceeds the output column count.</li>
9161     * </ul>
9162     *
9163     * <p>{@code sortKey.getExpressionType() == simple_constant_t} for bare
9164     * positive integers; negative integers parse as a {@code unary_minus_t}
9165     * over a {@code simple_constant_t} and are not handled here. Compound
9166     * constant expressions ({@code ORDER BY 1 + 0}, {@code ORDER BY (1)})
9167     * are {@code arithmetic_*_t} / {@code parenthesis_t} respectively and
9168     * fall through to the per-item empty-refs guard.
9169     *
9170     * <p>The empty-list case (output column resolved with
9171     * {@link OutputColumn#getSources()} empty — constant projections,
9172     * {@code COUNT(*)}, sourceless aggregates) is returned to the caller
9173     * which fires {@code ORDER_BY_NO_PHYSICAL_COLUMN_REFS}. Slice 68
9174     * boundary.
9175     *
9176     * <p>Sort direction (ASC/DESC) and null placement (NULLS FIRST/LAST)
9177     * are presentation metadata on {@link TOrderByItem}, not on the sort
9178     * key expression; this helper doesn't inspect them (slice 9 decision).
9179     */
9180    private static List<ColumnRef> tryResolveOrderByOrdinal(TExpression sortKey,
9181                                                             List<OutputColumn> outputs) {
9182        if (sortKey.getExpressionType() != EExpressionType.simple_constant_t) {
9183            return null;
9184        }
9185        String txt = sortKey.toString();
9186        if (txt == null || !txt.matches("\\d+")) {
9187            return null;
9188        }
9189        long v;
9190        try {
9191            v = Long.parseLong(txt);
9192        } catch (NumberFormatException e) {
9193            // Very-long-digit text overflows long; definitely out of range.
9194            throw new SemanticIRBuildException(
9195                    Diagnostic.error(DiagnosticCode.ORDER_BY_ORDINAL_OUT_OF_RANGE,
9196                    "ORDER BY ordinal '" + sortKey + "' is out of range "
9197                            + "(must be between 1 and " + outputs.size() + ")", sortKey));
9198        }
9199        if (v < 1 || v > outputs.size()) {
9200            throw new SemanticIRBuildException(
9201                    Diagnostic.error(DiagnosticCode.ORDER_BY_ORDINAL_OUT_OF_RANGE,
9202                    "ORDER BY ordinal '" + sortKey + "' is out of range "
9203                            + "(must be between 1 and " + outputs.size() + ")", sortKey));
9204        }
9205        return outputs.get((int) v - 1).getSources();
9206    }
9207
9208    /**
9209     * Slice 69: resolve a top-level bare projection-alias ORDER BY sort
9210     * key to the matching output column's sources. Returns:
9211     *
9212     * <ul>
9213     *   <li>{@code null} if {@code sortKey} is not a top-level bare
9214     *       {@code simple_object_name_t} whose object operand has
9215     *       {@code dbObjectType == EDbObjectType.column_alias} (caller
9216     *       continues with {@link #rejectOrderByAliasReference} for the
9217     *       deep-scan case and the standard column-ref collection);</li>
9218     *   <li>a {@link List} of {@link ColumnRef}s — the matching output's
9219     *       source list (which may be empty when the aliased projection
9220     *       is a constant or sourceless aggregate; the caller fires
9221     *       {@code ORDER_BY_NO_PHYSICAL_COLUMN_REFS} in that case);</li>
9222     *   <li>throws {@link SemanticIRBuildException} with
9223     *       {@code ORDER_BY_PROJECTION_ALIAS_NOT_SUPPORTED} when the
9224     *       parser retyped the operand to {@code column_alias} but no
9225     *       matching output exists by case-insensitive name (defensive;
9226     *       theoretically unreachable for parsable SQL).</li>
9227     * </ul>
9228     *
9229     * <p>Match strategy: case-insensitive ({@link Locale#ROOT}) on
9230     * {@link OutputColumn#getName()}, returning the FIRST match. This
9231     * mirrors the set-op outer alias matcher in
9232     * {@link #processSetOpOrderByObjectName} (which uses the identical
9233     * {@code toLowerCase(Locale.ROOT)} pattern and {@code break}s on
9234     * first match) and follows MySQL / PostgreSQL ORDER BY alias
9235     * resolution semantics. Duplicate aliases (e.g. {@code SELECT a AS x,
9236     * b AS x FROM t ORDER BY x}) resolve to the leftmost matching
9237     * projection; this is the documented slice-69 boundary.
9238     *
9239     * <p>The set-op outer alias path
9240     * ({@link #buildSetOpOuterOrderByColumnRefs} →
9241     * {@link #processSetOpOrderByObjectName}) was already admitted by
9242     * slice 21 and is independent of this helper.
9243     *
9244     * <p>Deep-scan alias references inside compound expressions
9245     * ({@code ORDER BY UPPER(<alias>)}) are NOT handled here — the
9246     * parser only retypes the top-level operand to {@code column_alias};
9247     * inside nested expressions the alias may or may not be retyped by
9248     * resolver2 depending on schema heuristics. The slice-9
9249     * {@code orderByNestedAliasReferenceIsHandledSafely} contract is
9250     * preserved: deep alias refs are caught by
9251     * {@link #rejectOrderByAliasReference} with
9252     * {@code ORDER_BY_UNSUPPORTED_SORT_KEY_SHAPE} or by a binding
9253     * failure.
9254     */
9255    private static List<ColumnRef> tryResolveOrderByProjectionAlias(
9256            TExpression sortKey, List<OutputColumn> outputs) {
9257        if (sortKey.getExpressionType() != EExpressionType.simple_object_name_t) {
9258            return null;
9259        }
9260        TObjectName op = sortKey.getObjectOperand();
9261        if (op == null || op.getDbObjectType() != EDbObjectType.column_alias) {
9262            return null;
9263        }
9264        String name = op.toString();
9265        if (name == null || name.isEmpty()) {
9266            return null;
9267        }
9268        String key = name.toLowerCase(Locale.ROOT);
9269        for (OutputColumn oc : outputs) {
9270            String outName = oc.getName();
9271            if (outName != null && outName.toLowerCase(Locale.ROOT).equals(key)) {
9272                return oc.getSources();
9273            }
9274        }
9275        throw new SemanticIRBuildException(
9276                Diagnostic.error(DiagnosticCode.ORDER_BY_PROJECTION_ALIAS_NOT_SUPPORTED,
9277                "ORDER BY projection alias '" + sortKey
9278                        + "' does not match any output column "
9279                        + "(defensive — parser retyped to column_alias "
9280                        + "but no output by that name)", sortKey));
9281    }
9282
9283    /**
9284     * Slice 68: reject ORDER BY sort keys that are constants but NOT
9285     * positive-integer ordinals. The positive-integer ordinal case is
9286     * admitted separately by {@link #tryResolveOrderByOrdinal} which maps
9287     * the ordinal to the matching output column's sources. This helper
9288     * handles the remaining constant shapes ({@code ORDER BY 'x'},
9289     * {@code ORDER BY 3.14}) — none of which reference an output position
9290     * and so contribute no column dependency.
9291     *
9292     * <p>The set-op outer ORDER BY path
9293     * ({@link #buildSetOpOuterOrderByColumnRefs}) keeps the original
9294     * {@link #rejectOrderByOrdinalOrConstant} helper so ordinals at that
9295     * scope stay rejected (slice 68 lifts only the single-SELECT case;
9296     * slice 72 will lift set-op outer).
9297     */
9298    private static void rejectOrderByNonOrdinalConstant(TExpression sortKey) {
9299        if (sortKey.getExpressionType() != EExpressionType.simple_constant_t) {
9300            return;
9301        }
9302        String txt = sortKey.toString();
9303        boolean looksOrdinal = txt != null && txt.matches("\\d+");
9304        if (looksOrdinal) {
9305            // Admitted by tryResolveOrderByOrdinal; this helper is a no-op
9306            // for positive-integer ordinals.
9307            return;
9308        }
9309        throw new SemanticIRBuildException(
9310                Diagnostic.error(DiagnosticCode.ORDER_BY_CONSTANT_NOT_SUPPORTED,
9311                "ORDER BY constant '" + sortKey + "' is not supported yet "
9312                        + "(constant sort keys add no column dependency)", sortKey));
9313    }
9314
9315    /**
9316     * Reject ORDER BY sort keys that contain a subquery anywhere in the
9317     * subtree. Catches both:
9318     *
9319     * <ul>
9320     *   <li>Scalar subqueries ({@link EExpressionType#subquery_t}) —
9321     *       e.g. {@code ORDER BY (SELECT MAX(salary) FROM employees)}.</li>
9322     *   <li>Predicate subqueries ({@code EXISTS}, {@code IN (SELECT ...)},
9323     *       {@code ANY/ALL/SOME (SELECT ...)}) — these don't appear as a
9324     *       {@code subquery_t} expression but carry a non-null
9325     *       {@link TExpression#getSubQuery()}, e.g.
9326     *       {@code ORDER BY CASE WHEN EXISTS (SELECT 1 FROM t WHERE ...)
9327     *       THEN 0 ELSE 1 END}.</li>
9328     * </ul>
9329     *
9330     * <p>The visitor descends into the subquery body, so without an
9331     * explicit reject the inner-scope refs would leak into the outer
9332     * statement's {@code orderByColumnRefs}. The same restriction is
9333     * applied to scalar subqueries in projection (see
9334     * {@link #buildOutputColumns}).
9335     */
9336    private static void rejectOrderByScalarSubquery(TExpression sortKey) {
9337        // Top-level fast path: scalar-subquery message for the common case.
9338        if (sortKey.getExpressionType() == EExpressionType.subquery_t
9339                || sortKey.getSubQuery() != null) {
9340            throw new SemanticIRBuildException(
9341                    Diagnostic.error(DiagnosticCode.ORDER_BY_SUBQUERY_NOT_SUPPORTED,
9342                    "ORDER BY subquery '" + sortKey + "' is not supported yet "
9343                            + "(subqueries in sort keys would leak inner column refs)", sortKey));
9344        }
9345        // Deep scan: any nested expression that owns a subquery (scalar,
9346        // EXISTS, IN (SELECT ...), ANY/ALL/SOME) makes the sort key
9347        // out of scope.
9348        final boolean[] found = {false};
9349        sortKey.acceptChildren(new TParseTreeVisitor() {
9350            @Override
9351            public void preVisit(TExpression e) {
9352                if (found[0]) return;
9353                if (e.getExpressionType() == EExpressionType.subquery_t
9354                        || e.getSubQuery() != null) {
9355                    found[0] = true;
9356                }
9357            }
9358        });
9359        if (found[0]) {
9360            throw new SemanticIRBuildException(
9361                    Diagnostic.error(DiagnosticCode.ORDER_BY_HAS_SUBQUERY_NOT_SUPPORTED,
9362                    "ORDER BY sort key '" + sortKey + "' contains a subquery "
9363                            + "(scalar, EXISTS, IN, or ANY/ALL/SOME); not supported yet", sortKey));
9364        }
9365    }
9366
9367    /**
9368     * Reject ORDER BY sort keys that contain a window function. Window
9369     * functions descend through {@link TFunctionCall#acceptChildren()} so
9370     * their PARTITION BY / ORDER BY column refs would otherwise leak into
9371     * the outer statement's {@code orderByColumnRefs}. Mirrors the
9372     * projection-side {@link #rejectWindowFunctions}, but wired through
9373     * the ORDER BY item-walk instead of the result-column list.
9374     */
9375    private static void rejectOrderByWindowFunction(TExpression sortKey) {
9376        final boolean[] found = {false};
9377        sortKey.acceptChildren(new TParseTreeVisitor() {
9378            @Override
9379            public void preVisit(TFunctionCall fn) {
9380                if (found[0]) return;
9381                if (fn.getWindowDef() != null) found[0] = true;
9382            }
9383        });
9384        if (!found[0] && sortKey.getExpressionType() == EExpressionType.function_t) {
9385            TFunctionCall fn = sortKey.getFunctionCall();
9386            if (fn != null && fn.getWindowDef() != null) found[0] = true;
9387        }
9388        if (found[0]) {
9389            throw new SemanticIRBuildException(
9390                    Diagnostic.error(DiagnosticCode.ORDER_BY_WINDOW_FUNCTION_NOT_SUPPORTED,
9391                    "ORDER BY window function '" + sortKey + "' is not supported yet "
9392                            + "(window OVER (...) refs would leak into orderByColumnRefs)", sortKey));
9393        }
9394    }
9395
9396    /**
9397     * Reject ORDER BY sort keys that are bare constants. Splits the
9398     * message between integer ordinals (which reference the SELECT
9399     * position) and other constants (which add no column dependency). The
9400     * generic no-physical-column-refs check in
9401     * {@link #buildOrderByColumnRefs} catches compound cases like
9402     * {@code ORDER BY (1)} or {@code ORDER BY 1 + 0}.
9403     *
9404     * <p><b>Slice 68:</b> the single-SELECT call site no longer uses this
9405     * helper because positive-integer ordinals now resolve to the matching
9406     * output column's sources (see {@link #tryResolveOrderByOrdinal}).
9407     * This helper remains for {@link #buildSetOpOuterOrderByColumnRefs},
9408     * where ordinal lifting is deferred to slice 72 (set-op outer
9409     * ORDER BY needs output-position references against the set-op output
9410     * row type, not the single-SELECT output column list).
9411     */
9412    private static void rejectOrderByOrdinalOrConstant(TExpression sortKey) {
9413        if (sortKey.getExpressionType() != EExpressionType.simple_constant_t) {
9414            return;
9415        }
9416        String txt = sortKey.toString();
9417        boolean looksOrdinal = txt != null && txt.matches("\\d+");
9418        if (looksOrdinal) {
9419            throw new SemanticIRBuildException(
9420                    Diagnostic.error(DiagnosticCode.ORDER_BY_ORDINAL_NOT_SUPPORTED,
9421                    "ORDER BY ordinal '" + sortKey + "' is not supported yet "
9422                            + "(reference the column or expression directly)", sortKey));
9423        }
9424        throw new SemanticIRBuildException(
9425                Diagnostic.error(DiagnosticCode.ORDER_BY_CONSTANT_NOT_SUPPORTED,
9426                "ORDER BY constant '" + sortKey + "' is not supported yet "
9427                        + "(constant sort keys add no column dependency)", sortKey));
9428    }
9429
9430    /**
9431     * Reject ORDER BY sort keys that contain a projection-alias reference
9432     * NESTED inside a compound expression (e.g.
9433     * {@code ORDER BY UPPER(<alias>)}). The visitor in
9434     * {@link #collectColumnRefs} skips column-alias nodes, so without an
9435     * explicit reject the IR would emit no column refs for them.
9436     *
9437     * <p><b>Slice 69:</b> the top-level bare-alias case (e.g.
9438     * {@code ORDER BY <alias>}) is now consumed by
9439     * {@link #tryResolveOrderByProjectionAlias} BEFORE this helper runs.
9440     * Only the deep-scan branch remains here; the top-level fast-path
9441     * was removed because it became unreachable.
9442     *
9443     * <p>{@link TOrderByItem#doParse} only retypes the top-level operand
9444     * to {@link TObjectName#ttobjColumnAlias} → dbObjectType
9445     * {@link EDbObjectType#column_alias}; inside nested expressions the
9446     * alias may or may not be retyped by resolver2 depending on schema
9447     * heuristics. Slice 9's
9448     * {@code orderByNestedAliasReferenceIsHandledSafely} documents the
9449     * three acceptable outcomes for deep aliases (reject by binding
9450     * failure, reject by this deep scan, or accept with a real column
9451     * dependency captured).
9452     */
9453    private static void rejectOrderByAliasReference(TExpression sortKey) {
9454        // Deep scan: an alias node nested inside an expression
9455        // (e.g. ORDER BY UPPER(x) where x is an alias) would otherwise be
9456        // silently dropped by the column-only visitor. The top-level
9457        // bare-alias case is consumed earlier by
9458        // tryResolveOrderByProjectionAlias (slice 69 lift).
9459        final boolean[] foundAlias = {false};
9460        final String[] aliasName = {null};
9461        sortKey.acceptChildren(new TParseTreeVisitor() {
9462            @Override
9463            public void preVisit(TObjectName node) {
9464                if (foundAlias[0]) return;
9465                if (node.getDbObjectType() == EDbObjectType.column_alias) {
9466                    foundAlias[0] = true;
9467                    aliasName[0] = node.toString();
9468                }
9469            }
9470        });
9471        if (foundAlias[0]) {
9472            throw new SemanticIRBuildException(
9473                    Diagnostic.error(DiagnosticCode.ORDER_BY_UNSUPPORTED_SORT_KEY_SHAPE,
9474                    "ORDER BY sort key '" + sortKey
9475                            + "' contains a projection alias reference '"
9476                            + aliasName[0] + "'; not supported yet "
9477                            + "(reference the underlying column directly)", sortKey));
9478        }
9479    }
9480
9481    /**
9482     * Reject SELECT shapes outside current builder scope. The
9483     * {@code skipCteListCheck} flag is true only for the outer SELECT of a
9484     * WITH-bearing query whose CTEs were already extracted by
9485     * {@link #build}; nested WITH inside a CTE body is still rejected.
9486     */
9487    private static void rejectUnsupportedShape(TSelectSqlStatement select, boolean skipCteListCheck) {
9488        // Slice 12: top-level set-ops and CTE-body set-ops are dispatched
9489        // by build() to buildSetOpProgram BEFORE buildSelectStatement is
9490        // called. This rejection still fires when buildSelectStatement
9491        // is called from a recursive context (FROM-subquery / scalar-body
9492        // extraction) where the inner SELECT happens to be a set-op —
9493        // those nested cases remain out of scope.
9494        if (select.getSetOperatorType() != null && select.getSetOperatorType() != ESetOperatorType.none) {
9495            throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.SET_OPERATION_NOT_SUPPORTED_IN_CONTEXT, "set operations (UNION/INTERSECT/MINUS) are not supported in this context yet", select));
9496        }
9497        if (!skipCteListCheck && select.getCteList() != null && select.getCteList().size() > 0) {
9498            throw new SemanticIRBuildException(
9499                    Diagnostic.error(DiagnosticCode.NESTED_WITH_NOT_SUPPORTED,
9500                    "nested WITH/CTE inside a CTE body or subquery is not supported yet", select));
9501        }
9502        // DISTINCT / UNIQUE / ALL handling is done in resolveDistinctFlag()
9503        // (called from buildSelectStatement). Only rejected row-filter shapes
9504        // bubble up as a SemanticIRBuildException; the rest become the
9505        // StatementGraph.distinct flag.
9506        // Slice 6 lifted GROUP BY; slice 10 lifted HAVING. The HAVING
9507        // expression itself (and the per-shape rejections for subqueries
9508        // and window functions inside it) are handled in
9509        // buildHavingColumnRefs so the rejection messages can mention the
9510        // specific shape.
9511        // Slices 70 and 71: all single-SELECT row-limit admit/reject
9512        // decisions live in buildRowLimit(select). rejectUnsupportedShape
9513        // no longer carries any row-limit logic. Set-op outer row-limits
9514        // remain handled by rejectSetOpRowLimit (slice 72 lifts).
9515        // Slice 13 codex impl-review round-2 MUST 3: reject Teradata
9516        // QUALIFY clause. QUALIFY filters rows based on window-function
9517        // results (e.g. `QUALIFY ROW_NUMBER() OVER (...) = 1`); without
9518        // this guard a window-function projection paired with a QUALIFY
9519        // clause would silently ignore the row-filter and produce an
9520        // incomplete IR. Lifting requires modeling row-filter semantics
9521        // similar to slice-9's row-limit canonical-model exclusion.
9522        if (select.getQualifyClause() != null) {
9523            throw new SemanticIRBuildException(
9524                    Diagnostic.error(DiagnosticCode.QUALIFY_NOT_SUPPORTED,
9525                    "QUALIFY clause is not supported yet; row-filter on "
9526                            + "window-function results requires modelling alongside "
9527                            + "the slice-13 window-function projection support", select));
9528        }
9529        // ORDER BY itself is lifted in slice 9; see buildOrderByColumnRefs.
9530        // Vendor- and clause-level guards are checked there so the rejection
9531        // message can mention the specific sub-clause.
9532    }
9533
9534    /**
9535     * Walk the FROM clause: each top-level {@link TJoin} contributes its
9536     * base table; each chained {@link TJoinItem} contributes one more base
9537     * table plus the column refs found in its ON-condition expression.
9538     * Comma-separated FROM lists (multiple top-level TJoins) and
9539     * single-source SELECTs both reduce to the same loop.
9540     */
9541    private static List<RelationSource> buildRelations(TSelectSqlStatement select,
9542                                                       NameBindingProvider provider,
9543                                                       List<ColumnRef> joinRefsOut,
9544                                                       boolean allowFromSubqueries) {
9545        return buildRelations(select, provider, joinRefsOut, allowFromSubqueries,
9546                /*allowJoinOnPredicateSubqueries=*/ false,
9547                /*stmtsForExtraction=*/ null,
9548                /*lineageForExtraction=*/ null,
9549                /*cteMapForExtraction=*/ null);
9550    }
9551
9552    /**
9553     * Slice-23/24 overload of {@link #buildRelations}. When
9554     * {@code allowJoinOnPredicateSubqueries} is {@code true} (outer-SELECT
9555     * call site only), uncorrelated EXISTS subqueries inside JOIN ON
9556     * predicates are extracted as their own {@code <predicate_subquery_<i>>}
9557     * StatementGraphs appended to {@code stmtsForExtraction}. The extracted
9558     * subtrees are then skipped by the JOIN-ON window-function rejecter and
9559     * the JOIN-ON ref collector so their inner refs do not leak into outer
9560     * {@code joinColumnRefs}.
9561     *
9562     * <p>Slice 24: {@code cteMapForExtraction} carries outer's
9563     * CTE-name-to-statement-index map so the extracted predicate body can
9564     * emit STATEMENT_OUTPUT → STATEMENT_OUTPUT edges into outer-visible CTE
9565     * bodies via {@link #emitLineageForStatement}. Non-outer call sites
9566     * (where {@code allowJoinOnPredicateSubqueries=false}) pass {@code null}.
9567     */
9568    private static List<RelationSource> buildRelations(TSelectSqlStatement select,
9569                                                       NameBindingProvider provider,
9570                                                       List<ColumnRef> joinRefsOut,
9571                                                       boolean allowFromSubqueries,
9572                                                       boolean allowJoinOnPredicateSubqueries,
9573                                                       List<StatementGraph> stmtsForExtraction,
9574                                                       List<LineageEdge> lineageForExtraction,
9575                                                       Map<String, Integer> cteMapForExtraction) {
9576        if (select.joins == null || select.joins.size() == 0) {
9577            throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.SELECT_NO_FROM_SOURCE, "SELECT must have at least one FROM source", select));
9578        }
9579        // Slice 62: comma-separated FROM lists (e.g. `FROM a, b`)
9580        // parse as multiple top-level TJoin elements. We admit them at
9581        // outer / CTE-body / FROM-subquery-body call sites (where
9582        // {@code allowFromSubqueries=true}) and build them as an ordered
9583        // cross-product relation graph with empty {@code joinColumnRefs}
9584        // (WHERE-side predicates feed {@code filterColumnRefs} as
9585        // usual). Synthetic body contexts (scalar / set-op-branch /
9586        // set-op-CTE / predicate) call this method with
9587        // {@code allowFromSubqueries=false} and stay rejected — that
9588        // is exactly the discriminator we need. Predicate bodies also
9589        // hit an earlier shape-specific reject inside
9590        // {@link #preflightExistsInnerShape}.
9591        if (!allowFromSubqueries && select.joins.size() > 1) {
9592            throw new SemanticIRBuildException(
9593                    Diagnostic.error(DiagnosticCode.COMMA_FROM_IN_BODY_NOT_SUPPORTED,
9594                    "comma-separated FROM list (implicit cross join) is not supported "
9595                            + "inside scalar / set-op-branch / set-op-CTE / predicate body "
9596                            + "contexts yet; use explicit JOIN ... ON", select));
9597        }
9598        // Slice 63: explicit CROSS JOIN admits at outer / CTE-body /
9599        // FROM-subquery-body call sites (allowFromSubqueries=true) but
9600        // stays rejected inside synthetic body contexts (scalar /
9601        // set-op-branch / set-op-CTE / predicate) because the body's
9602        // shape contract (single column for scalar; column-count parity
9603        // for set-op branches; constant or single column-ref for
9604        // predicates) cannot host a cross-product relation graph
9605        // safely. Predicate bodies also hit an earlier shape-specific
9606        // reject inside {@link #preflightExistsInnerShape} so the
9607        // user-visible diagnostic mentions EXISTS / IN-SELECT context.
9608        if (!allowFromSubqueries) {
9609            for (TJoin join : select.joins) {
9610                TJoinItemList items = join.getJoinItems();
9611                if (items == null) continue;
9612                for (int i = 0; i < items.size(); i++) {
9613                    TJoinItem item = items.getJoinItem(i);
9614                    if (item == null) continue;
9615                    if (item.getJoinType() == EJoinType.cross) {
9616                        throw new SemanticIRBuildException(
9617                                Diagnostic.error(DiagnosticCode.CROSS_JOIN_IN_BODY_NOT_SUPPORTED,
9618                                "CROSS JOIN is not supported inside scalar / "
9619                                        + "set-op-branch / set-op-CTE / predicate "
9620                                        + "body contexts yet; rewrite as INNER "
9621                                        + "JOIN ... ON in the body", item));
9622                    }
9623                    // Slice 64: USING admitted at outer / CTE-body /
9624                    // FROM-subquery-body call sites but rejected inside
9625                    // synthetic body contexts. The body's shape contract
9626                    // (single column for scalar, column-count parity for
9627                    // set-op branches, constant/column-ref for predicate
9628                    // bodies) cannot host the merged-key semantics safely.
9629                    if (item.getUsingColumns() != null
9630                            && item.getUsingColumns().size() > 0) {
9631                        throw new SemanticIRBuildException(
9632                                Diagnostic.error(DiagnosticCode.USING_IN_BODY_NOT_SUPPORTED,
9633                                "JOIN ... USING (...) is not supported inside "
9634                                        + "scalar / set-op-branch / set-op-CTE / "
9635                                        + "predicate body contexts yet; rewrite "
9636                                        + "as JOIN ... ON in the body", item));
9637                    }
9638                    // Slice 66: NATURAL JOIN inside synthetic body
9639                    // contexts is rejected with a tuned diagnostic.
9640                    // Predicate bodies hit preflightExistsInnerShape
9641                    // first which emits an EXISTS-tuned message; this
9642                    // reject fires for scalar / set-op-branch /
9643                    // set-op-CTE bodies (and as defense-in-depth for
9644                    // predicate bodies if the preflight didn't catch
9645                    // a vendor-specific variant).
9646                    if (isNaturalJoinType(item.getJoinType())) {
9647                        throw new SemanticIRBuildException(
9648                                Diagnostic.error(DiagnosticCode.NATURAL_IN_BODY_NOT_SUPPORTED,
9649                                "NATURAL JOIN is not supported inside "
9650                                        + "scalar / set-op-branch / set-op-CTE / "
9651                                        + "predicate body contexts yet; rewrite "
9652                                        + "as JOIN ... ON in the body", item));
9653                    }
9654                }
9655            }
9656        }
9657        List<RelationSource> relations = new ArrayList<>();
9658        for (TJoin join : select.joins) {
9659            TTable leftTable = join.getTable();
9660            if (leftTable == null) {
9661                throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.FROM_SOURCE_NO_TABLE, "FROM source has no table", join));
9662            }
9663            relations.add(buildRelation(leftTable, provider, allowFromSubqueries));
9664
9665            // Slice 66: per top-level TJoin LeftOutputState. Seeded
9666            // with the top-left table's catalog; updated as JoinItems
9667            // walk left-to-right. NATURAL JoinItems consume this state
9668            // (catalog intersection) and update it (merge right's
9669            // shared keys into existing slots, append non-shared right
9670            // columns). Reset between top-level TJoins so comma-FROM
9671            // groups stay independent.
9672            LeftOutputState leftState = new LeftOutputState();
9673            seedLeftOutput(leftState, leftTable, provider);
9674
9675            TJoinItemList items = join.getJoinItems();
9676            if (items == null) continue;
9677            for (int i = 0; i < items.size(); i++) {
9678                TJoinItem item = items.getJoinItem(i);
9679                rejectUnsupportedJoinShape(item);
9680                TTable rightTable = item.getTable();
9681                if (rightTable == null) {
9682                    throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.JOIN_ITEM_NO_TABLE, "JOIN item has no table", item));
9683                }
9684                // Slice 17/18: subqueries on a JOIN side are extracted as
9685                // their own statements by extractFromSubqueriesAsStatements
9686                // before buildRelations runs (when allowFromSubqueries=true
9687                // — outer build path, non-set-op CTE body, AND nested
9688                // FROM-subquery body recursion).
9689                // Scalar-body / set-op-branch / set-op-CTE-body builds pass
9690                // allowFromSubqueries=false; buildRelation rejects there.
9691                relations.add(buildRelation(rightTable, provider, allowFromSubqueries));
9692                // Slice 66: NATURAL admission via catalog inference.
9693                // Computes shared keys against the running LeftOutputState
9694                // (which carries the accumulated row type so far) and
9695                // routes through the shared emitMergedJoinRefs helper.
9696                // Rejects with a side-specific diagnostic when catalog
9697                // is missing on either side.
9698                if (isNaturalJoinType(item.getJoinType())) {
9699                    NaturalKeyResult r = naturalSharedKeys(leftState, rightTable, provider);
9700                    if (r.kind != NaturalKeyResult.Kind.SUCCESS) {
9701                        throw new SemanticIRBuildException(
9702                                Diagnostic.error(DiagnosticCode.NATURAL_CATALOG_REQUIRED,
9703                                formatNaturalCatalogReject(r), item));
9704                    }
9705                    emitMergedJoinRefs(JoinKind.NATURAL, r.keys, join, items, i,
9706                            rightTable, provider, joinRefsOut);
9707                    mergeRightIntoLeftOutput(leftState, rightTable, provider, r.keys);
9708                    continue;
9709                }
9710                // Slice 64: USING and ON are mutually exclusive
9711                // (enforced by rejectUnsupportedJoinShape). Populate
9712                // per-key joinColumnRefs from USING here, then skip the
9713                // onCond branch since it cannot be both.
9714                TObjectNameList usingCols = item.getUsingColumns();
9715                if (usingCols != null && usingCols.size() > 0) {
9716                    populateUsingJoinRefs(join, items, i, rightTable,
9717                            usingCols, provider, joinRefsOut);
9718                    // Slice 66: USING JoinItems also merge right into
9719                    // the LeftOutputState so subsequent NATURAL JoinItems
9720                    // see the accumulated row type (including the merged
9721                    // USING keys at their original slots).
9722                    List<String> usingKeyNames = new ArrayList<>(usingCols.size());
9723                    for (int k = 0; k < usingCols.size(); k++) {
9724                        TObjectName usingKey = usingCols.getObjectName(k);
9725                        if (usingKey == null) continue;
9726                        String keyName = usingKey.getColumnNameOnly();
9727                        if (keyName != null && !keyName.isEmpty()) {
9728                            usingKeyNames.add(keyName);
9729                        }
9730                    }
9731                    mergeRightIntoLeftOutput(leftState, rightTable, provider, usingKeyNames);
9732                    continue;
9733                }
9734                // Slice 66: ON / CROSS JoinItem — append right's catalog
9735                // columns to the running LeftOutputState. NATURAL JoinItems
9736                // that follow will see the accumulated row type.
9737                appendRightToLeftOutput(leftState, rightTable, provider);
9738                TExpression onCond = item.getOnCondition();
9739                if (onCond != null) {
9740                    if (allowJoinOnPredicateSubqueries) {
9741                        // Slice 23/24/25: outer-SELECT JOIN ON path —
9742                        // extract uncorrelated predicate-subquery wrappers
9743                        // (EXISTS / NOT EXISTS / IN-SELECT / NOT IN-SELECT /
9744                        // scalar comparison subquery / ANY-ALL-SOME) as
9745                        // their own <predicate_subquery_<i>>
9746                        // StatementGraphs and skip their subtrees during
9747                        // the slice-13 window guard and the JOIN-ON ref
9748                        // collection. The remaining subquery-leak guard
9749                        // rejects everything that is NOT an extracted
9750                        // wrapper (correlated bodies, multi-column inner
9751                        // projection, expression LHS, tuple LHS, subquery
9752                        // on left side, etc.).
9753                        Set<TExpression> extractedRoots =
9754                                extractUncorrelatedPredicateSubqueriesFromJoinOn(onCond, provider,
9755                                        stmtsForExtraction, lineageForExtraction,
9756                                        cteMapForExtraction);
9757                        rejectAnyRemainingSubqueriesInJoinOn(onCond, extractedRoots);
9758                        rejectWindowFunctionInScopeSkipping(onCond,
9759                                "JOIN ON condition", extractedRoots);
9760                        joinRefsOut.addAll(
9761                                collectColumnRefsSkipping(onCond, provider, extractedRoots));
9762                    } else {
9763                        // Slice 13: reject window functions in JOIN ON before
9764                        // collectColumnRefs descends.
9765                        rejectWindowFunctionInScope(onCond, "JOIN ON condition");
9766                        // Slice 17: predicate subqueries inside JOIN ON would otherwise
9767                        // slip through collectColumnRefs and produce an incomplete IR.
9768                        // Hardens the boundary at every non-outer `buildRelations` call site
9769                        // (FROM-subquery body, CTE body, scalar body, set-op branch).
9770                        // The slice-23 outer-SELECT path replaces this rejection with
9771                        // selective EXISTS extraction (see the if branch above).
9772                        rejectSubqueriesInJoinOn(onCond);
9773                        joinRefsOut.addAll(collectColumnRefs(onCond, provider));
9774                    }
9775                }
9776            }
9777        }
9778        rejectDuplicateAliases(relations);
9779        return relations;
9780    }
9781
9782    /**
9783     * Slice 17: reject predicate subqueries (EXISTS, IN-SELECT,
9784     * scalar-subquery comparisons, etc.) inside a JOIN ON expression.
9785     * Without this guard, slice-17's expanded JOIN surface (relation
9786     * subqueries on either side) would let predicate subqueries slip
9787     * past {@code collectColumnRefs} and produce incomplete IR. Applies
9788     * to every {@code buildRelations} call site; the slice-11
9789     * {@link #rejectSubqueriesInScalarBodyClauses} and slice-17
9790     * {@link #rejectSubqueriesInFromSubqueryBodyClauses} fire BEFORE
9791     * the recursive {@code buildSelectStatement}, so their context-
9792     * specific messages preempt this one.
9793     */
9794    private static void rejectSubqueriesInJoinOn(TExpression onCond) {
9795        if (containsAnySubqueryExpression(onCond)) {
9796            throw new SemanticIRBuildException(
9797                    Diagnostic.error(DiagnosticCode.JOIN_ON_TOP_LEVEL_SUBQUERY_NOT_SUPPORTED,
9798                    "subquery in a top-level JOIN ON predicate is not supported yet", onCond));
9799        }
9800    }
9801
9802    // ====================================================================
9803    // Slice 23: uncorrelated EXISTS subqueries in top-level outer-SELECT
9804    // JOIN ON.
9805    //
9806    // Approach: walk the JOIN-ON expression looking for `exists_t` nodes
9807    // (and `not_t(exists_t(...))` for NOT EXISTS); validate each as
9808    // uncorrelated with a constant-only inner projection; build the inner
9809    // SELECT as its own `<predicate_subquery_<i>>` StatementGraph; record
9810    // the extracted `exists_t` root in a Set so the JOIN-ON window guard
9811    // and ref collector can skip its subtree. Predicate bodies are
9812    // unreachable from outer (no relation, no lineage edge) so they
9813    // contribute zero canonical edges — matching dlineage's behaviour for
9814    // EXISTS-in-JOIN-ON shapes that project a constant (the inner-shape
9815    // preflight enforces constant-only projection so this invariant
9816    // holds).
9817    //
9818    // Process: codex round 1 + round 2 plan reviews; v3 plan locked.
9819    // See roadmap §14.25 (slice-23 entry).
9820    // ====================================================================
9821
9822    /**
9823     * True iff {@code e} is the root of an EXISTS predicate that slice 23
9824     * may extract: either an {@code exists_t} expression, or a
9825     * {@code logical_not_t} whose <b>right</b> operand is {@code exists_t}.
9826     * NOT EXISTS unwraps to its inner {@code exists_t}.
9827     *
9828     * <p>Note: the GSP parser puts the operand of {@code logical_not_t} in
9829     * {@link TExpression#getRightOperand()}, not {@code getLeftOperand()}
9830     * (verified across Oracle / PostgreSQL / MSSQL / MySQL / BigQuery).
9831     * The root fast-path for {@code NOT EXISTS} is therefore "dead" in the
9832     * sense that the descendant walker on the wrapping {@code logical_not_t}
9833     * already visits the child {@code exists_t} — we still keep it here so
9834     * the symmetry between root EXISTS and root NOT EXISTS is explicit.
9835     *
9836     * <p>Slice 25 (kept as a dedicated helper for slice-23/24 callers and
9837     * for clarity): the slice-25 generalisation lives in
9838     * {@link #unwrapToInnerExtractableSubquery(TExpression)} which
9839     * recognises four wrapper shapes — including the two EXISTS shapes
9840     * here.
9841     */
9842    private static boolean isExistsRoot(TExpression e) {
9843        if (e == null) return false;
9844        if (e.getExpressionType() == EExpressionType.exists_t) return true;
9845        if (e.getExpressionType() == EExpressionType.logical_not_t
9846                && e.getRightOperand() != null
9847                && e.getRightOperand().getExpressionType() == EExpressionType.exists_t) {
9848            return true;
9849        }
9850        return false;
9851    }
9852
9853    /** Return the actual {@code exists_t} node — unwrap a {@code logical_not_t} parent if present. */
9854    private static TExpression unwrapExistsRoot(TExpression e) {
9855        if (e.getExpressionType() == EExpressionType.exists_t) return e;
9856        return e.getRightOperand();
9857    }
9858
9859    /**
9860     * Slice 25 / Slice 26: pure shape-recogniser for the predicate-
9861     * subquery wrappers admitted in TOP-LEVEL JOIN ON. Returns the inner
9862     * extractable node ({@code subquery_t} or {@code exists_t}) for the
9863     * wrapper shapes; null otherwise. Pure — performs NO validation and
9864     * throws NO exceptions.
9865     *
9866     * <p>Recognised wrappers:
9867     * <ul>
9868     *   <li>{@code exists_t} (slice-23 EXISTS) — returns {@code e}.</li>
9869     *   <li>{@code logical_not_t} with rightOperand {@code exists_t}
9870     *       (slice-23 NOT EXISTS) — returns the inner exists_t.</li>
9871     *   <li>{@code in_t} with rightOperand {@code subquery_t}
9872     *       (slice 25 IN-SELECT / NOT IN-SELECT) — returns the
9873     *       rightOperand. LHS-subquery {@code in_t} returns null
9874     *       (slice 26 boundary: dlineage's {@code fdr clause="on"}
9875     *       sources omit the outer column for IN-LHS, so admitting on
9876     *       the IR side would manufacture canonical-model divergence).</li>
9877     *   <li>{@code simple_comparison_t} (slice 25 + slice 26 scalar
9878     *       comparison) — returns the operand on whichever side is a
9879     *       {@code subquery_t}. RHS-subquery (slice 25) and LHS-
9880     *       subquery (slice 26) are both admitted; both-sides subquery
9881     *       returns null and falls through to {@link #findSubqueryOnLeftWrapper}'s
9882     *       new "both subqueries" rejection branch.</li>
9883     *   <li>{@code group_comparison_t} with rightOperand
9884     *       {@code subquery_t} AND non-null {@code getQuantifier()}
9885     *       (slice 25 ANY/ALL/SOME) — returns the rightOperand.
9886     *       LHS-subquery {@code group_comparison_t} returns null
9887     *       (slice 26 boundary: borderline grammar; not probed).</li>
9888     * </ul>
9889     *
9890     * <p>For null returns, the wrapper either is not a recognised shape
9891     * (falls through to the slice-23 generic remaining-subquery rejection
9892     * in {@link #rejectAnyRemainingSubqueriesInJoinOn}) OR has the right
9893     * outer shape but the LHS / RHS positioning is unsupported (subquery
9894     * on left side of IN/quantifier, both sides subquery for cmp, tuple
9895     * LHS / RHS, expression LHS / RHS). The walker validates the
9896     * non-subquery side via {@link #isAdmittedOuterLhsShape} or
9897     * {@link #isAdmittedOuterRhsShape} and throws a slice-25 / slice-26
9898     * tuned message before calling this helper for extraction.
9899     *
9900     * <p>The slice-23/24 EXISTS callers ({@code isExistsRoot} and
9901     * {@code unwrapExistsRoot}) remain in place — both are simple
9902     * boolean / unwrap helpers; this method consolidates the slice-25 /
9903     * slice-26 shape decision in one place.
9904     */
9905    private static TExpression unwrapToInnerExtractableSubquery(TExpression e) {
9906        if (e == null) return null;
9907        EExpressionType t = e.getExpressionType();
9908        if (t == EExpressionType.exists_t) return e;
9909        if (t == EExpressionType.logical_not_t
9910                && e.getRightOperand() != null
9911                && e.getRightOperand().getExpressionType() == EExpressionType.exists_t) {
9912            return e.getRightOperand();
9913        }
9914        TExpression l = e.getLeftOperand();
9915        TExpression r = e.getRightOperand();
9916        boolean lhsIsSubq = l != null && l.getExpressionType() == EExpressionType.subquery_t;
9917        boolean rhsIsSubq = r != null && r.getExpressionType() == EExpressionType.subquery_t;
9918        if (t == EExpressionType.in_t) {
9919            return rhsIsSubq ? r : null;
9920        }
9921        if (t == EExpressionType.simple_comparison_t) {
9922            // Slice 26: admit subquery on either single side. Both sides
9923            // → null (rejected via findSubqueryOnLeftWrapper's new
9924            // "both subqueries" branch — see isSubqueryOnLeftOfWrapper).
9925            if (lhsIsSubq && rhsIsSubq) return null;
9926            if (rhsIsSubq) return r;
9927            if (lhsIsSubq) return l;
9928            return null;
9929        }
9930        if (t == EExpressionType.group_comparison_t
9931                && e.getQuantifier() != null) {
9932            return rhsIsSubq ? r : null;
9933        }
9934        return null;
9935    }
9936
9937    /**
9938     * Slice 25: admitted LHS shapes for non-EXISTS predicate-subquery
9939     * wrappers ({@code in_t} / {@code simple_comparison_t} /
9940     * {@code group_comparison_t}) when the subquery is on the RHS.
9941     *
9942     * <p>Admits ONLY {@link EExpressionType#simple_object_name_t} —
9943     * a single column reference, qualified or unqualified. Rejects:
9944     * tuple expressions ({@code (a, b) IN (...)}), parenthesized
9945     * wrapping ({@code (e.col) IN (...)}), arithmetic
9946     * ({@code e.col + 1 IN (...)}), function calls
9947     * ({@code UPPER(e.col) IN (...)}), scalar subqueries on LHS, and
9948     * any other non-column shape.
9949     *
9950     * <p>Slice-25 boundary; future slice may admit parenthesized
9951     * column refs (slice 26+).
9952     */
9953    private static boolean isAdmittedOuterLhsShape(TExpression lhs) {
9954        return lhs != null
9955                && lhs.getExpressionType() == EExpressionType.simple_object_name_t;
9956    }
9957
9958    /**
9959     * Slice 26: admitted RHS shapes for {@code simple_comparison_t}
9960     * with subquery on the LHS. Mirror of
9961     * {@link #isAdmittedOuterLhsShape}: admits ONLY
9962     * {@link EExpressionType#simple_object_name_t} — a single column
9963     * reference, qualified or unqualified. Rejects tuple, parenthesized,
9964     * arithmetic, function-call, and subquery (the "both subqueries"
9965     * shape is rejected separately via
9966     * {@link #isSubqueryOnLeftOfWrapper}'s new
9967     * {@code simple_comparison_t} both-sides branch).
9968     *
9969     * <p>Slice-26 boundary: only {@code simple_comparison_t} reaches
9970     * this helper (the walker dispatches on which side is the
9971     * subquery). {@code in_t} / {@code group_comparison_t} with LHS
9972     * subquery return null from
9973     * {@link #unwrapToInnerExtractableSubquery} so they never reach
9974     * here.
9975     */
9976    private static boolean isAdmittedOuterRhsShape(TExpression rhs) {
9977        return rhs != null
9978                && rhs.getExpressionType() == EExpressionType.simple_object_name_t;
9979    }
9980
9981    /**
9982     * Slice 25 (impl-review M1-fix): true iff {@code e} is a
9983     * {@code logical_not_t} wrapping a slice-25 IN / scalar-cmp /
9984     * ANY-ALL-SOME wrapper (i.e. NOT applied to an admitted slice-25
9985     * shape that ISN'T an EXISTS). The descendant walker would
9986     * otherwise traverse INTO this {@code logical_not_t} and find the
9987     * child wrapper, accidentally admitting
9988     * {@code NOT (e.col IN (SELECT ...))} which is NOT a slice-25
9989     * recognised shape ({@code unwrapToInnerExtractableSubquery}
9990     * matches {@code logical_not_t} only when the inner is
9991     * {@code exists_t}).
9992     *
9993     * <p>This helper is consulted by the extraction walker BEFORE it
9994     * descends into the children of a {@code logical_not_t}, so the
9995     * rejection happens at the wrapper level with a tuned message
9996     * pointing at the slice-25 boundary.
9997     */
9998    private static boolean isLogicalNotOverNonExistsWrapper(TExpression e) {
9999        if (e == null) return false;
10000        if (e.getExpressionType() != EExpressionType.logical_not_t) return false;
10001        TExpression r = e.getRightOperand();
10002        if (r == null) return false;
10003        // Strip parenthesis_t chain. The Oracle parser wraps
10004        // `NOT (e.col IN (SELECT...))` as
10005        // logical_not_t → parenthesis_t → in_t, so the immediate
10006        // right child is parenthesis_t. Descend through any chain of
10007        // parens to find the actual subject. Note:
10008        // {@code parenthesis_t} stores its child on
10009        // {@link TExpression#getLeftOperand()} (mirroring
10010        // {@link #isConstantExpression}'s descent).
10011        TExpression subject = r;
10012        while (subject != null
10013                && subject.getExpressionType() == EExpressionType.parenthesis_t) {
10014            subject = subject.getLeftOperand();
10015        }
10016        if (subject == null) return false;
10017        if (subject.getExpressionType() == EExpressionType.exists_t) return false;
10018        // Either an in_t / simple_comparison_t / group_comparison_t
10019        // with subquery RHS, or any of those types directly.
10020        return unwrapToInnerExtractableSubquery(subject) != null;
10021    }
10022
10023    /**
10024     * Slice 25 / Slice 26: build a tuned outer-shape rejection message
10025     * for a non-EXISTS predicate-subquery wrapper. Called from the
10026     * extraction walker when {@link #unwrapToInnerExtractableSubquery}
10027     * returns non-null for an in_t / simple_comparison_t /
10028     * group_comparison_t but the non-subquery side is not admitted by
10029     * {@link #isAdmittedOuterLhsShape} (slice 25 — subquery on RHS) or
10030     * {@link #isAdmittedOuterRhsShape} (slice 26 — subquery on LHS).
10031     *
10032     * <p>{@code isLhsSubquery} indicates which side of the wrapper
10033     * carries the subquery: {@code true} = subquery on LHS (slice 26
10034     * path; we validate the wrapper's RHS), {@code false} = subquery
10035     * on RHS (slice 25 path; we validate the wrapper's LHS).
10036     *
10037     * <p>Uses the slice-25 outer-shape error prefix
10038     * "predicate subquery in JOIN ON:" so end users distinguish
10039     * outer-shape failures from the slice-23/24 inner-shape failures
10040     * (which keep the "EXISTS in JOIN ON:" prefix).
10041     */
10042    private static String buildOuterShapeRejectionMessage(TExpression wrapper,
10043                                                          boolean isLhsSubquery,
10044                                                          PredicateClauseContext ctx) {
10045        EExpressionType t = wrapper.getExpressionType();
10046        String shapeLabel;
10047        if (t == EExpressionType.in_t) shapeLabel = "IN";
10048        else if (t == EExpressionType.simple_comparison_t) shapeLabel = "comparison";
10049        else if (t == EExpressionType.group_comparison_t) shapeLabel = "ANY/ALL/SOME";
10050        else shapeLabel = String.valueOf(t);
10051        // Validate the side that does NOT carry the subquery.
10052        TExpression nonSubquerySide = isLhsSubquery
10053                ? wrapper.getRightOperand()
10054                : wrapper.getLeftOperand();
10055        String sideLabel = isLhsSubquery ? "RHS" : "LHS";
10056        EExpressionType sideType = nonSubquerySide == null
10057                ? null : nonSubquerySide.getExpressionType();
10058        String detail;
10059        if (nonSubquerySide == null) {
10060            detail = "missing " + sideLabel;
10061        } else if (sideType == EExpressionType.list_t) {
10062            detail = "tuple " + sideLabel;
10063        } else if (sideType == EExpressionType.parenthesis_t) {
10064            detail = "parenthesized " + sideLabel;
10065        } else if (sideType == EExpressionType.simple_object_name_t) {
10066            // Defensive: should not be reached when the corresponding
10067            // admitted-shape helper returns true.
10068            detail = "unexpected admitted " + sideLabel + " shape";
10069        } else {
10070            detail = "expression " + sideLabel + " (" + sideType + ")";
10071        }
10072        String boundary = isLhsSubquery ? "slice 26 boundary" : "slice 25 boundary";
10073        return "predicate subquery in " + ctx.clauseLabel + ": " + shapeLabel
10074                + " wrapper has unsupported " + sideLabel + " shape ("
10075                + detail + "); only a single column reference "
10076                + "(simple_object_name_t) is admitted on the "
10077                + sideLabel
10078                + " of a comparison / IN / ANY-ALL-SOME "
10079                + "predicate subquery when the other side is a "
10080                + "subquery (" + boundary + ")";
10081    }
10082
10083    /**
10084     * Slice 25 (rename of slice-23
10085     * {@code extractUncorrelatedExistsFromJoinOn}): walk the JOIN-ON
10086     * expression, extract every uncorrelated predicate-subquery wrapper
10087     * (EXISTS / NOT EXISTS / IN-SELECT / NOT IN-SELECT / scalar
10088     * comparison subquery / ANY-ALL-SOME) as its own
10089     * {@code <predicate_subquery_<i>>} StatementGraph, and return the
10090     * set of extracted inner nodes (the {@code exists_t} or
10091     * {@code subquery_t}, NOT the wrapping {@code in_t} /
10092     * {@code simple_comparison_t} / {@code group_comparison_t} /
10093     * {@code logical_not_t}) keyed on identity.
10094     *
10095     * <p>The set is consumed by the JOIN-ON window-function guard, the
10096     * JOIN-ON ref collector, and the slice-17 remaining-subquery
10097     * rejecter — each of those skips INTO / PAST these subtrees so
10098     * inner refs do not leak into outer joinColumnRefs. Critically,
10099     * the wrapper itself (e.g. an {@code in_t} whose RHS is the
10100     * {@code subquery_t}) is NOT in the set — this lets the LHS column
10101     * reference (e.g. {@code e.dept_id} in
10102     * {@code e.dept_id IN (SELECT ...)}) be collected normally into
10103     * outer's {@code joinColumnRefs}.
10104     *
10105     * <p>The walker handles BOTH the root-position case (the entire ON
10106     * IS one of the four wrappers, which {@code acceptChildren} would
10107     * not visit as a node) AND descendant positions (e.g.
10108     * {@code e.id = d.id AND e.dept_id IN (SELECT ...)}). Multiple
10109     * wrappers in one ON, multiple ON across multiple JOINs, and mixed
10110     * EXISTS / IN / cmp / ANY-ALL combinations are all handled.
10111     *
10112     * <p>Slice 25 / Slice 26 outer-shape validation: for non-EXISTS
10113     * wrappers, the side opposite the subquery must be a single
10114     * {@code simple_object_name_t} column ref. Slice 25 admits subquery
10115     * on RHS only and validates LHS via
10116     * {@link #isAdmittedOuterLhsShape}. Slice 26 lifts {@code
10117     * simple_comparison_t} to also admit subquery on LHS and validates
10118     * RHS via {@link #isAdmittedOuterRhsShape}. Tuple / parenthesized /
10119     * expression / function-call shapes on the validated side throw
10120     * {@link SemanticIRBuildException} with a tuned message via
10121     * {@link #buildOuterShapeRejectionMessage}. The EXISTS branch has
10122     * no outer-shape gate (slice-23 carryover).
10123     *
10124     * <p>Snapshot/rollback wrapper at the outer-SELECT call site
10125     * ({@link #build}) catches a partial extraction (e.g. third
10126     * wrapper rejected after first two extracted) and truncates
10127     * {@code stmts}/{@code lineage} back to the snapshot.
10128     */
10129    /**
10130     * Slice 110 — context bag threading clause-specific
10131     * {@link DiagnosticCode}s and a clause-label into the slice-23+
10132     * predicate-subquery extraction pipeline so the same walker code can
10133     * power JOIN-ON (slice 23–33+) and UPDATE WHERE (slice 110) without
10134     * code duplication.
10135     *
10136     * <p>Two static instances exist:
10137     * <ul>
10138     *   <li>{@link #JOIN_ON} — preserves slice-23+ JOIN-ON behavior
10139     *       byte-for-byte (same codes, same "JOIN ON" labels).</li>
10140     *   <li>{@link #UPDATE_WHERE} — slice 110 UPDATE WHERE call site
10141     *       (parallel {@code UPDATE_WHERE_*} codes; "UPDATE WHERE clause"
10142     *       label).</li>
10143     * </ul>
10144     *
10145     * <p>Codes per clause are intentionally parallel (slice-80
10146     * granular-codes contract: each semantic reject reason gets its own
10147     * stable API code rather than an umbrella code with discriminating
10148     * message text).
10149     */
10150    private static final class PredicateClauseContext {
10151        /** Used as the "in &lt;label&gt;" piece of every diagnostic message. */
10152        final String clauseLabel;
10153        final DiagnosticCode existsBodyMissing;
10154        final DiagnosticCode existsInnerRelationUnknown;
10155        final DiagnosticCode existsCorrelatedUnknownOuterAlias;
10156        final DiagnosticCode predicateNotNot;
10157        final DiagnosticCode outerShapeRejected;
10158        final DiagnosticCode scalarComparisonBothSides;
10159        final DiagnosticCode predicateSubqueryOnLeft;
10160        final DiagnosticCode genericSubqueryNotSupported;
10161
10162        private PredicateClauseContext(String clauseLabel,
10163                                       DiagnosticCode existsBodyMissing,
10164                                       DiagnosticCode existsInnerRelationUnknown,
10165                                       DiagnosticCode existsCorrelatedUnknownOuterAlias,
10166                                       DiagnosticCode predicateNotNot,
10167                                       DiagnosticCode outerShapeRejected,
10168                                       DiagnosticCode scalarComparisonBothSides,
10169                                       DiagnosticCode predicateSubqueryOnLeft,
10170                                       DiagnosticCode genericSubqueryNotSupported) {
10171            this.clauseLabel = clauseLabel;
10172            this.existsBodyMissing = existsBodyMissing;
10173            this.existsInnerRelationUnknown = existsInnerRelationUnknown;
10174            this.existsCorrelatedUnknownOuterAlias = existsCorrelatedUnknownOuterAlias;
10175            this.predicateNotNot = predicateNotNot;
10176            this.outerShapeRejected = outerShapeRejected;
10177            this.scalarComparisonBothSides = scalarComparisonBothSides;
10178            this.predicateSubqueryOnLeft = predicateSubqueryOnLeft;
10179            this.genericSubqueryNotSupported = genericSubqueryNotSupported;
10180        }
10181
10182        static final PredicateClauseContext JOIN_ON = new PredicateClauseContext(
10183                "JOIN ON",
10184                DiagnosticCode.JOIN_ON_EXISTS_BODY_MISSING,
10185                DiagnosticCode.JOIN_ON_EXISTS_INNER_RELATION_UNKNOWN,
10186                DiagnosticCode.JOIN_ON_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10187                DiagnosticCode.JOIN_ON_PREDICATE_NOT_NOT_SUPPORTED,
10188                DiagnosticCode.JOIN_ON_OUTER_SHAPE_REJECTED,
10189                DiagnosticCode.JOIN_ON_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10190                DiagnosticCode.JOIN_ON_PREDICATE_NOT_LIFTABLE,
10191                DiagnosticCode.JOIN_ON_PREDICATE_GENERIC_NOT_SUPPORTED);
10192
10193        static final PredicateClauseContext UPDATE_WHERE = new PredicateClauseContext(
10194                "UPDATE WHERE clause",
10195                DiagnosticCode.UPDATE_WHERE_EXISTS_BODY_MISSING,
10196                DiagnosticCode.UPDATE_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10197                DiagnosticCode.UPDATE_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10198                DiagnosticCode.UPDATE_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10199                DiagnosticCode.UPDATE_WHERE_OUTER_SHAPE_REJECTED,
10200                DiagnosticCode.UPDATE_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10201                DiagnosticCode.UPDATE_WHERE_PREDICATE_NOT_LIFTABLE,
10202                DiagnosticCode.UPDATE_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10203
10204        static final PredicateClauseContext DELETE_WHERE = new PredicateClauseContext(
10205                "DELETE WHERE clause",
10206                DiagnosticCode.DELETE_WHERE_EXISTS_BODY_MISSING,
10207                DiagnosticCode.DELETE_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10208                DiagnosticCode.DELETE_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10209                DiagnosticCode.DELETE_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10210                DiagnosticCode.DELETE_WHERE_OUTER_SHAPE_REJECTED,
10211                DiagnosticCode.DELETE_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10212                DiagnosticCode.DELETE_WHERE_PREDICATE_NOT_LIFTABLE,
10213                DiagnosticCode.DELETE_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10214
10215        static final PredicateClauseContext SELECT_WHERE = new PredicateClauseContext(
10216                "SELECT WHERE clause",
10217                DiagnosticCode.SELECT_WHERE_EXISTS_BODY_MISSING,
10218                DiagnosticCode.SELECT_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10219                DiagnosticCode.SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10220                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10221                DiagnosticCode.SELECT_WHERE_OUTER_SHAPE_REJECTED,
10222                DiagnosticCode.SELECT_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10223                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_LIFTABLE,
10224                DiagnosticCode.SELECT_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10225
10226        /**
10227         * Slice 113 — uncorrelated WHERE-side predicate subqueries on
10228         * set-op branches (UNION / INTERSECT / EXCEPT / MINUS branches).
10229         * Reuses every {@link DiagnosticCode} from {@link #SELECT_WHERE}
10230         * because a branch IS a SELECT — the shape rejects are
10231         * semantically identical to top-level SELECT WHERE. Only the
10232         * {@code clauseLabel} differs so diagnostic messages distinguish
10233         * the nested context (helpful when a multi-branch query reports
10234         * a reject and the user needs to know which branch). Keeping the
10235         * codes shared frees consumers from a new code-family migration
10236         * and preserves the enum count at 279.
10237         */
10238        static final PredicateClauseContext SET_OP_BRANCH_WHERE = new PredicateClauseContext(
10239                "set-op branch WHERE clause",
10240                DiagnosticCode.SELECT_WHERE_EXISTS_BODY_MISSING,
10241                DiagnosticCode.SELECT_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10242                DiagnosticCode.SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10243                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10244                DiagnosticCode.SELECT_WHERE_OUTER_SHAPE_REJECTED,
10245                DiagnosticCode.SELECT_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10246                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_LIFTABLE,
10247                DiagnosticCode.SELECT_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10248
10249        /**
10250         * Slice 114 — uncorrelated WHERE-side predicate subqueries
10251         * inside a non-set-op CTE body (the SELECT body of a single CTE
10252         * in a WITH list on SELECT / MERGE / UPDATE / DELETE). Reuses
10253         * every {@link DiagnosticCode} from {@link #SELECT_WHERE}
10254         * because a CTE body IS a SELECT — the shape rejects are
10255         * semantically identical to top-level SELECT WHERE. Only the
10256         * {@code clauseLabel} differs so a reject in a
10257         * {@code WITH cte AS (SELECT ... WHERE NOT (...))} shape can
10258         * identify the CTE-body host context in the diagnostic message.
10259         * Keeping the codes shared (slice 113 precedent) preserves the
10260         * enum count at 279 and frees consumers from another
10261         * code-family migration.
10262         */
10263        static final PredicateClauseContext CTE_BODY_WHERE = new PredicateClauseContext(
10264                "CTE body WHERE clause",
10265                DiagnosticCode.SELECT_WHERE_EXISTS_BODY_MISSING,
10266                DiagnosticCode.SELECT_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10267                DiagnosticCode.SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10268                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10269                DiagnosticCode.SELECT_WHERE_OUTER_SHAPE_REJECTED,
10270                DiagnosticCode.SELECT_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10271                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_LIFTABLE,
10272                DiagnosticCode.SELECT_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10273
10274        /**
10275         * Slice 120 — uncorrelated WHERE-side predicate subqueries inside
10276         * a FROM-subquery body (the inner SELECT of a {@code FROM (...)}
10277         * derived table). Reuses every {@link DiagnosticCode} from
10278         * {@link #SELECT_WHERE} (slice 113/114/116 precedent) because a
10279         * FROM-subquery body IS a SELECT — the shape rejects are
10280         * semantically identical to top-level SELECT WHERE. Only the
10281         * {@code clauseLabel} differs so a reject inside a
10282         * {@code FROM (SELECT ... WHERE NOT (...)) sub} shape can identify
10283         * the FROM-subquery host context in the diagnostic message. The
10284         * FROM-subquery body builder {@code processDirectSubqueryTable} is
10285         * shared by the SELECT, UPDATE (slice 83), and DELETE (slice 84)
10286         * FROM-subquery extractors, so this single context lifts all three.
10287         * Keeping the codes shared preserves the enum count at 279 and
10288         * frees consumers from another code-family migration.
10289         */
10290        static final PredicateClauseContext FROM_SUBQUERY_BODY_WHERE = new PredicateClauseContext(
10291                "FROM-subquery body WHERE clause",
10292                DiagnosticCode.SELECT_WHERE_EXISTS_BODY_MISSING,
10293                DiagnosticCode.SELECT_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10294                DiagnosticCode.SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10295                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10296                DiagnosticCode.SELECT_WHERE_OUTER_SHAPE_REJECTED,
10297                DiagnosticCode.SELECT_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10298                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_LIFTABLE,
10299                DiagnosticCode.SELECT_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10300
10301        /**
10302         * Slice 116 — uncorrelated WHERE-side predicate subqueries on
10303         * MERGE per-WHEN action WHEREs ({@code TMergeUpdateClause.updateWhereClause},
10304         * {@code TMergeUpdateClause.deleteWhereClause},
10305         * {@code TMergeInsertClause.insertWhereClause}). Reuses every
10306         * {@link DiagnosticCode} from {@link #SELECT_WHERE} (slice 113/114
10307         * precedent) because a MERGE-action WHERE predicate IS a SELECT
10308         * WHERE in shape — the shape rejects are semantically identical to
10309         * top-level SELECT WHERE. Only the {@code clauseLabel} differs so a
10310         * reject inside a MERGE WHEN can identify the host context in the
10311         * diagnostic message. Keeping the codes shared preserves the enum
10312         * count at 279 and frees consumers from another code-family
10313         * migration.
10314         */
10315        static final PredicateClauseContext MERGE_WHEN_WHERE = new PredicateClauseContext(
10316                "MERGE WHEN action WHERE clause",
10317                DiagnosticCode.SELECT_WHERE_EXISTS_BODY_MISSING,
10318                DiagnosticCode.SELECT_WHERE_EXISTS_INNER_RELATION_UNKNOWN,
10319                DiagnosticCode.SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS,
10320                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_NOT_SUPPORTED,
10321                DiagnosticCode.SELECT_WHERE_OUTER_SHAPE_REJECTED,
10322                DiagnosticCode.SELECT_WHERE_PREDICATE_SCALAR_COMPARISON_NOT_LIFTABLE,
10323                DiagnosticCode.SELECT_WHERE_PREDICATE_NOT_LIFTABLE,
10324                DiagnosticCode.SELECT_WHERE_PREDICATE_GENERIC_NOT_SUPPORTED);
10325    }
10326
10327    /**
10328     * Slice 110 — preserved entry-point alias for the JOIN-ON walker.
10329     * Delegates to {@link #extractUncorrelatedPredicateSubqueriesFromClause}
10330     * with {@link PredicateClauseContext#JOIN_ON} so existing JOIN-ON
10331     * callers (single site in {@code buildRelations}) need no change and
10332     * the slice-23+ diagnostic byte-shape is preserved exactly.
10333     */
10334    private static Set<TExpression> extractUncorrelatedPredicateSubqueriesFromJoinOn(
10335            TExpression onCond,
10336            final NameBindingProvider provider,
10337            final List<StatementGraph> stmts,
10338            final List<LineageEdge> lineage,
10339            final Map<String, Integer> cteMapForExtraction) {
10340        return extractUncorrelatedPredicateSubqueriesFromClause(
10341                onCond, provider, stmts, lineage, cteMapForExtraction,
10342                PredicateClauseContext.JOIN_ON,
10343                /*correlationScope=*/ null);
10344    }
10345
10346    /**
10347     * Slice 118 — overload preserved for the slice-110 / 111 / 112 / 113 /
10348     * 114 / 116 call sites that don't admit correlation. Delegates to the
10349     * 8-arg form with {@code correlationScope=null}.
10350     */
10351    private static Set<TExpression> extractUncorrelatedPredicateSubqueriesFromClause(
10352            TExpression onCond,
10353            final NameBindingProvider provider,
10354            final List<StatementGraph> stmts,
10355            final List<LineageEdge> lineage,
10356            final Map<String, Integer> cteMapForExtraction,
10357            final PredicateClauseContext ctx) {
10358        return extractUncorrelatedPredicateSubqueriesFromClause(onCond,
10359                provider, stmts, lineage, cteMapForExtraction, ctx,
10360                /*correlationScope=*/ null);
10361    }
10362
10363    /**
10364     * Slice 118 — same as the 7-arg overload but threads an optional
10365     * {@code correlationScope} (target + USING source + outer CTEs) into
10366     * {@link #extractOnePredicateSubqueryBody}. When non-null, the inner
10367     * predicate-body build uses tolerant outer binding and the post-build
10368     * correlation walk PROMOTES outer-aliased refs into synthesised
10369     * OUTER_REFERENCE relations instead of rejecting them. The FILTER and
10370     * WITHIN GROUP correlation walks remain active (codex round-1 Q2
10371     * BLOCKING fix) so refs hidden inside FILTER subtrees or PG
10372     * {@code fn.withinGroup.orderBy} continue to reject.
10373     *
10374     * <p>All non-MERGE callers pass {@code correlationScope=null} and
10375     * therefore see byte-identical behaviour. Only
10376     * {@link #collectMergeActionWhere} passes a non-null scope (built once
10377     * per MERGE in {@code buildMerge} via
10378     * {@link #buildMergeEnclosingScope}).
10379     */
10380    private static Set<TExpression> extractUncorrelatedPredicateSubqueriesFromClause(
10381            TExpression onCond,
10382            final NameBindingProvider provider,
10383            final List<StatementGraph> stmts,
10384            final List<LineageEdge> lineage,
10385            final Map<String, Integer> cteMapForExtraction,
10386            final PredicateClauseContext ctx,
10387            final EnclosingScope correlationScope) {
10388        // Defensive null assertions — the extraction path can only be
10389        // reached from buildRelations when
10390        // allowJoinOnPredicateSubqueries=true (outer-SELECT only), which
10391        // guarantees stmts/lineage/cteMap are non-null. Failing here means
10392        // a future refactor wired a non-outer call site through the slice-25
10393        // path without supplying the required state.
10394        if (onCond == null) {
10395            return Collections.newSetFromMap(new java.util.IdentityHashMap<TExpression, Boolean>());
10396        }
10397        if (stmts == null || lineage == null || cteMapForExtraction == null) {
10398            throw new IllegalStateException(
10399                    "extractUncorrelatedPredicateSubqueriesFromClause("
10400                            + ctx.clauseLabel + ") activated without required state — "
10401                            + "stmts=" + (stmts == null ? "null" : "ok")
10402                            + " lineage=" + (lineage == null ? "null" : "ok")
10403                            + " cteMap=" + (cteMapForExtraction == null ? "null" : "ok")
10404                            + "; caller misconfiguration");
10405        }
10406        final Set<TExpression> extractedRoots =
10407                Collections.newSetFromMap(new java.util.IdentityHashMap<TExpression, Boolean>());
10408        // Slice 25 (impl-review M1-fix): explicit reject for
10409        // {@code logical_not_t} over a slice-25 IN / scalar-cmp /
10410        // ANY-ALL-SOME wrapper at the root. The slice-23/24
10411        // {@code logical_not_t} over {@code exists_t} (NOT EXISTS)
10412        // remains admitted by unwrapToInnerExtractableSubquery.
10413        if (isLogicalNotOverNonExistsWrapper(onCond)) {
10414            throw new SemanticIRBuildException(
10415                    Diagnostic.error(ctx.predicateNotNot,
10416                    "predicate subquery in " + ctx.clauseLabel + ": NOT applied to "
10417                            + "a non-EXISTS predicate subquery wrapper "
10418                            + "(" + onCond.getRightOperand().getExpressionType()
10419                            + ") is not supported yet — the slice-25 boundary "
10420                            + "admits NOT only over EXISTS; "
10421                            + "rewrite e.g. NOT (a IN (SELECT ...)) as "
10422                            + "a NOT IN (SELECT ...)", onCond));
10423        }
10424        // Root fast path: acceptChildren never visits the root node, so
10425        // a clause whose entire expression IS a wrapper would be missed
10426        // by the descendant walker.
10427        TExpression rootExtractable = unwrapToInnerExtractableSubquery(onCond);
10428        if (rootExtractable != null) {
10429            // M1-fix + slice-26 dual-side: validate the non-subquery
10430            // side of non-EXISTS wrappers BEFORE extracting (so partial
10431            // extraction never lands). Slice 25 carryover: subquery on
10432            // RHS → validate LHS via isAdmittedOuterLhsShape.
10433            // Slice 26 NEW: subquery on LHS (simple_comparison_t only)
10434            // → validate RHS via isAdmittedOuterRhsShape.
10435            if (onCond.getExpressionType() != EExpressionType.exists_t
10436                    && onCond.getExpressionType() != EExpressionType.logical_not_t) {
10437                boolean isLhsSubquery = (rootExtractable == onCond.getLeftOperand());
10438                boolean nonSubquerySideOk = isLhsSubquery
10439                        ? isAdmittedOuterRhsShape(onCond.getRightOperand())
10440                        : isAdmittedOuterLhsShape(onCond.getLeftOperand());
10441                if (!nonSubquerySideOk) {
10442                    throw new SemanticIRBuildException(
10443                            Diagnostic.error(ctx.outerShapeRejected,
10444                            buildOuterShapeRejectionMessage(onCond, isLhsSubquery, ctx),
10445                            onCond));
10446                }
10447            }
10448            extractOnePredicateSubqueryBody(rootExtractable, provider, stmts, lineage,
10449                    cteMapForExtraction, ctx, correlationScope);
10450            extractedRoots.add(rootExtractable);
10451        }
10452        // Descendant walk: find every wrapper at any depth. Skip into
10453        // already-extracted subtrees (so we don't re-enter the body
10454        // looking for nested wrappers — covered by the inner-shape
10455        // preflight's "no nested predicate subqueries in body"
10456        // rejection).
10457        onCond.acceptChildren(new TParseTreeVisitor() {
10458            // Track depth into already-extracted roots and into wrapper
10459            // subtrees we've extracted. preVisit increments on the
10460            // wrapper (the parent that contained the inner extractable);
10461            // postVisit decrements on either the inner extractable
10462            // (extractedRoots.contains) or the wrapper
10463            // (unwrapToInnerExtractableSubquery != null). The
10464            // {@code skipDepth > 0} guard prevents the second
10465            // decrement from going negative when both apply (e.g. NOT
10466            // EXISTS — both the logical_not_t wrapper and the inner
10467            // exists_t fire).
10468            int skipDepth = 0;
10469
10470            @Override
10471            public void preVisit(TExpression e) {
10472                if (skipDepth > 0) return;
10473                if (extractedRoots.contains(e)) {
10474                    // Already-extracted inner being re-visited shouldn't
10475                    // happen in normal traversal but defensive guard
10476                    // avoids double-extraction if it ever did.
10477                    skipDepth++;
10478                    return;
10479                }
10480                // Slice 25 (impl-review M1-fix): explicit reject for
10481                // {@code logical_not_t} over a slice-25 wrapper at any
10482                // depth. Without this, the visitor would descend into
10483                // the wrapper child and silently extract — admitting
10484                // a shape (`NOT (a IN (SELECT ...))`) that the
10485                // slice-25 boundary does NOT admit.
10486                if (isLogicalNotOverNonExistsWrapper(e)) {
10487                    throw new SemanticIRBuildException(
10488                            Diagnostic.error(ctx.predicateNotNot,
10489                            "predicate subquery in " + ctx.clauseLabel + ": NOT applied to "
10490                                    + "a non-EXISTS predicate subquery wrapper "
10491                                    + "(" + e.getRightOperand().getExpressionType()
10492                                    + ") is not supported yet — the slice-25 "
10493                                    + "boundary admits NOT only over EXISTS; "
10494                                    + "rewrite e.g. NOT (a IN (SELECT ...)) as "
10495                                    + "a NOT IN (SELECT ...)", e));
10496                }
10497                TExpression toExtract = unwrapToInnerExtractableSubquery(e);
10498                if (toExtract != null) {
10499                    // M1-fix + slice-26 dual-side: validate the non-
10500                    // subquery side BEFORE extracting (so partial
10501                    // extraction never lands). The slice-23/24
10502                    // NOT-EXISTS path uses logical_not_t, which has no
10503                    // outer-shape gate.
10504                    if (e.getExpressionType() != EExpressionType.exists_t
10505                            && e.getExpressionType() != EExpressionType.logical_not_t) {
10506                        boolean isLhsSubquery = (toExtract == e.getLeftOperand());
10507                        boolean nonSubquerySideOk = isLhsSubquery
10508                                ? isAdmittedOuterRhsShape(e.getRightOperand())
10509                                : isAdmittedOuterLhsShape(e.getLeftOperand());
10510                        if (!nonSubquerySideOk) {
10511                            throw new SemanticIRBuildException(
10512                                    Diagnostic.error(ctx.outerShapeRejected,
10513                                    buildOuterShapeRejectionMessage(e, isLhsSubquery, ctx),
10514                                    e));
10515                        }
10516                    }
10517                    if (extractedRoots.contains(toExtract)) return;
10518                    extractOnePredicateSubqueryBody(toExtract, provider, stmts, lineage,
10519                            cteMapForExtraction, ctx, correlationScope);
10520                    extractedRoots.add(toExtract);
10521                    skipDepth++;
10522                }
10523            }
10524
10525            @Override
10526            public void postVisit(TExpression e) {
10527                // M2-fix: decrement on EITHER the extracted inner
10528                // (extractedRoots.contains) OR the wrapper
10529                // (unwrapToInnerExtractableSubquery != null). The
10530                // {@code skipDepth > 0} guard prevents going negative
10531                // when both apply.
10532                if (skipDepth > 0
10533                        && (extractedRoots.contains(e)
10534                                || unwrapToInnerExtractableSubquery(e) != null)) {
10535                    skipDepth--;
10536                }
10537            }
10538        });
10539        return extractedRoots;
10540    }
10541
10542    /**
10543     * Slice 25 (rename of slice-23 {@code extractOneExistsBody}):
10544     * extract a single predicate-subquery body's inner SELECT as its
10545     * own {@code <predicate_subquery_<i>>} StatementGraph. Runs the
10546     * inner-shape preflight before recursive build, then post-build
10547     * correlation check.
10548     *
10549     * <p>{@code extractableNode} is either an {@code exists_t}
10550     * (slice-23 EXISTS / slice-24 column-bearing EXISTS) or a
10551     * {@code subquery_t} (slice-25 IN-SELECT / scalar comparison /
10552     * ANY-ALL-SOME). Both expose the inner SELECT via
10553     * {@link TExpression#getSubQuery()}.
10554     */
10555    private static void extractOnePredicateSubqueryBody(TExpression extractableNode,
10556                                                        NameBindingProvider provider,
10557                                                        List<StatementGraph> stmts,
10558                                                        List<LineageEdge> lineage,
10559                                                        Map<String, Integer> cteMapForExtraction,
10560                                                        PredicateClauseContext ctx,
10561                                                        EnclosingScope correlationScope) {
10562        TSelectSqlStatement inner = extractableNode.getSubQuery();
10563        if (inner == null) {
10564            // Degenerate node with no subquery; defensive.
10565            throw new SemanticIRBuildException(
10566                    Diagnostic.error(ctx.existsBodyMissing,
10567                    "EXISTS in " + ctx.clauseLabel + ": subquery body is missing", null));
10568        }
10569        // (a–g) Inner-shape preflight (slice-23 boundary; slice 24 widens
10570        // (e) to admit single column-ref projection in addition to constant).
10571        preflightExistsInnerShape(inner);
10572
10573        // Slice 118 — when an enclosing correlation scope is supplied
10574        // (MERGE per-WHEN action WHERE only), decorate `provider` with
10575        // tolerant outer binding so the inner build admits qualified refs
10576        // to outer aliases (target / USING source / outer CTEs) as
10577        // synthetic EXACT_MATCH bindings instead of rejecting them as
10578        // COLUMN_BINDING_NON_EXACT. Mirrors the slice-117 pattern for
10579        // UPDATE SET-RHS correlated scalars. Computed BEFORE
10580        // buildSelectStatementImpl so the inner build's bindColumn calls
10581        // see the tolerant fallback already populated (codex round-5
10582        // ordering fix from slice 117). Qualifiers IN the inner's local
10583        // FROM aliases still strict-reject so real typos (`o.bad_col`
10584        // where `o` IS the inner FROM alias) still surface as
10585        // COLUMN_BINDING_NON_EXACT.
10586        final NameBindingProvider effectiveProvider;
10587        if (correlationScope != null) {
10588            Set<String> innerLocalAliasesForTolerant =
10589                    precomputeInnerLocalAliases(inner);
10590            effectiveProvider = innerLocalAliasesForTolerant.isEmpty()
10591                    ? provider
10592                    : provider.withTolerantOuterBinding(
10593                            innerLocalAliasesForTolerant);
10594        } else {
10595            effectiveProvider = provider;
10596        }
10597
10598        // Build the inner SELECT as its own StatementGraph. SAME provider
10599        // as outer (codex round-1 MUST 3 — outer CTEs remain visible).
10600        // Slice 118: tolerant-decorated provider when correlationScope
10601        // != null (MERGE per-WHEN action WHERE only); same provider as
10602        // before otherwise.
10603        // hasOuterCteListAlreadyProcessed=false (codex round-2 SHOULD 1 —
10604        // generic nested-WITH guard remains active as belt-and-braces).
10605        // allowFromSubqueries=false (no FROM-subqueries in inner body for
10606        // slice 23). isPredicateBody=true: for constant-only inner emits one
10607        // synthetic OutputColumn (slice-23 path); for column-ref inner the
10608        // §4.1.2 short-circuit falls through to the normal column-ref path
10609        // (slice-24 widening).
10610        String predName = PREDICATE_BODY_PREFIX + stmts.size() + ">";
10611        StatementGraph innerStmt = buildSelectStatementImpl(inner, effectiveProvider, predName,
10612                /*hasOuterCteListAlreadyProcessed=*/ false,
10613                /*allowFromSubqueries=*/ false,
10614                /*allowScalarProjectionSubqueries=*/ false,
10615                /*allowWindowProjection=*/ false,
10616                /*allowJoinOnPredicateSubqueries=*/ false,
10617                /*stmtsForExtraction=*/ null,
10618                /*lineageForExtraction=*/ null,
10619                /*cteMapForExtraction=*/ null,
10620                /*isPredicateBody=*/ true,
10621                /*whereClauseContext=*/ PredicateClauseContext.SELECT_WHERE,
10622                /*allowWherePredicateSubqueries=*/ false);
10623
10624        // Slice 24 (codex impl-review SHOULD 1): defensive relation-kind
10625        // walk. The preflight rejects FROM-subqueries; the post-build
10626        // correlation check below rejects OUTER_REFERENCE relations
10627        // (synthesised by promoteCorrelatedRefsToOuterReference for
10628        // outer refs we don't see). Belt-and-braces: the predicate body
10629        // must contain ONLY TABLE or CTE-bound relations. SUBQUERY /
10630        // OUTER_REFERENCE / UNION leaking through here would mean the
10631        // emitLineageForStatement call below routes through code paths
10632        // (e.g. the SUBQUERY-alias map) that we deliberately pass empty,
10633        // producing a SemanticIRBuildException about an unregistered
10634        // alias. Failing fast here surfaces the architectural violation
10635        // with a slice-24-tuned message instead.
10636        for (RelationSource r : innerStmt.getRelations()) {
10637            RelationKind kind = r.getBinding().getKind();
10638            if (kind != RelationKind.TABLE && kind != RelationKind.CTE) {
10639                throw new SemanticIRBuildException(
10640                        Diagnostic.error(ctx.existsInnerRelationUnknown,
10641                        "EXISTS in " + ctx.clauseLabel + ": inner SELECT relation '"
10642                                + r.getAlias() + "' has unsupported binding kind "
10643                                + kind + "; only TABLE or CTE relations are admitted "
10644                                + "(slice 24 boundary)", null));
10645            }
10646        }
10647        // Post-build correlation check (codex round-1 MUST 2 + round-2 SHOULD 2).
10648        // Use the existing collectAllInnerRefs helper so clause coverage
10649        // stays in sync with promoteCorrelatedRefsToOuterReference.
10650        // Slice 24: collectAllInnerRefs includes OutputColumn.sources, so
10651        // a column-ref projection like `EXISTS (SELECT e.id FROM x)` where
10652        // `e` is the OUTER's alias trips the same correlation rejection —
10653        // no extra slice-24 code needed.
10654        //
10655        // Slice 118 — when correlationScope != null (MERGE per-WHEN action
10656        // WHERE only), instead of REJECTING outer-aliased refs we PROMOTE
10657        // them into synthesised OUTER_REFERENCE relations via
10658        // promoteCorrelatedRefsToOuterReference. Mirrors the slice-14 /
10659        // slice-117 pattern. Unknown outer aliases (not in target / USING
10660        // source / outer CTEs) still throw SCALAR_SUBQUERY_UNKNOWN_RELATION_ALIAS
10661        // (the promoter's existing boundary; diagnostic message says
10662        // "scalar subquery" — acceptable cosmetic limitation, slice 117
10663        // precedent). The slice-118 lift covers refs landing in
10664        // collectAllInnerRefs clauses (output sources, filter, join,
10665        // groupBy, having, orderBy, distinctOn); the FILTER and WITHIN
10666        // GROUP walks BELOW remain active so outer-aliased refs hidden
10667        // inside FILTER subtrees or PG fn.withinGroup.orderBy still
10668        // reject with SELECT_WHERE_EXISTS_CORRELATED_UNKNOWN_OUTER_ALIAS
10669        // (codex round-1 Q2 BLOCKING preserved this boundary).
10670        Set<String> innerLocalAliases = new HashSet<>();
10671        for (RelationSource r : innerStmt.getRelations()) {
10672            innerLocalAliases.add(r.getAlias().toLowerCase(Locale.ROOT));
10673        }
10674        if (correlationScope != null) {
10675            // Pass a descriptive "outerAlias" so the promoter's diagnostic
10676            // messages identify the MERGE predicate-body host context if
10677            // promotion fails on an unknown alias.
10678            innerStmt = promoteCorrelatedRefsToOuterReference(
10679                    innerStmt,
10680                    "<merge predicate subquery " + (stmts.size()) + ">",
10681                    correlationScope);
10682        } else {
10683            for (ColumnRef ref : collectAllInnerRefs(innerStmt)) {
10684                if (!innerLocalAliases.contains(ref.getRelationAlias().toLowerCase(Locale.ROOT))) {
10685                    throw new SemanticIRBuildException(
10686                            Diagnostic.error(ctx.existsCorrelatedUnknownOuterAlias,
10687                            "EXISTS in " + ctx.clauseLabel + ": correlated reference to outer alias '"
10688                                    + ref.getRelationAlias()
10689                                    + "' is not supported yet (slice 23 accepts uncorrelated EXISTS only)", null));
10690                }
10691            }
10692        }
10693        // Slice 28: projection-only FILTER-aware correlation walk. The
10694        // slice-28 source-skip in buildOutputColumns removes column refs
10695        // inside FILTER (WHERE ...) subtrees from OutputColumn.sources, so
10696        // a correlated FILTER ref in the inner projection (e.g.
10697        // `EXISTS (SELECT SUM(x.s) FILTER (WHERE e.region='EU') FROM x)`
10698        // where `e` is the outer alias) would slip past the loop above.
10699        // Existing collectAllInnerRefs continues to cover correlated
10700        // FILTER refs landing in inner WHERE / HAVING / GROUP BY / ORDER BY
10701        // / JOIN-ON because those clauses still collect via plain
10702        // collectColumnRefs which descends into FILTER subtrees.
10703        TResultColumnList rclForFilterWalk = inner.getResultColumnList();
10704        if (rclForFilterWalk != null) {
10705            for (int rci = 0; rci < rclForFilterWalk.size(); rci++) {
10706                TResultColumn rc = rclForFilterWalk.getResultColumn(rci);
10707                Set<TExpression> filterClauses = collectFilterClauses(rc);
10708                for (TExpression fclause : filterClauses) {
10709                    // Slice 118 — use effectiveProvider so under
10710                    // correlationScope != null, outer-aliased refs come
10711                    // back as synthetic EXACT_MATCH ColumnRefs (rather
10712                    // than throwing on tolerant fallback being absent).
10713                    // The alias-membership rejection below then fires
10714                    // for outer-aliased FILTER-inner refs, preserving
10715                    // the slice-118 boundary (codex round-1 Q2 BLOCKING
10716                    // fix: FILTER-inner correlation still rejects).
10717                    for (ColumnRef ref : collectColumnRefs(fclause, effectiveProvider)) {
10718                        if (!innerLocalAliases.contains(
10719                                ref.getRelationAlias().toLowerCase(Locale.ROOT))) {
10720                            throw new SemanticIRBuildException(
10721                                    Diagnostic.error(ctx.existsCorrelatedUnknownOuterAlias,
10722                                    "EXISTS in " + ctx.clauseLabel + ": correlated reference to outer alias '"
10723                                            + ref.getRelationAlias()
10724                                            + "' inside FILTER (WHERE ...) is not supported yet", null));
10725                        }
10726                    }
10727                }
10728            }
10729        }
10730        // Slice 30: projection-only direct WITHIN GROUP ORDER BY correlation
10731        // walk. PostgreSQL attaches WITHIN GROUP to fn.withinGroup directly,
10732        // and TFunctionCall.acceptChildren does NOT descend into that field —
10733        // so collectColumnRefs (and therefore collectAllInnerRefs above) is
10734        // blind to outer references inside `fn.withinGroup.orderBy`. A
10735        // correlated reference like
10736        // `mode() WITHIN GROUP (ORDER BY e.region)` (where `e` is the
10737        // outer alias) would slip past the slice-23 correlation loop above.
10738        // Catch it explicitly with a per-result-column WG ORDER BY scan.
10739        // Mirrors the slice-28 FILTER walk pattern. Also closes the same
10740        // correlation gap retroactively for slice-29-admitted aggregates
10741        // (LISTAGG / STRING_AGG / GROUP_CONCAT / ARRAY_AGG WITHIN GROUP),
10742        // see Slice30Test.pgCorrelatedListaggWithinGroupOrderByNowAlsoRejected.
10743        //
10744        // IMPORTANT: this walk uses a qualifier-only collector
10745        // ({@link #collectQualifierAliases}) instead of
10746        // {@link #collectColumnRefs}. Resolver2 also doesn't attach
10747        // ResolutionResult to TObjectName nodes inside PG's direct
10748        // fn.withinGroup field (it shares the AST asymmetry that lets
10749        // slice 29 admit these without a source-skip). Going through
10750        // {@code collectColumnRefs} → {@code provider.bindColumn} would
10751        // throw {@code non-exact column bindings} on legitimate
10752        // non-correlated refs (status=NOT_FOUND because Resolver2 skipped
10753        // them). The qualifier-only collector reads the qualifier alias
10754        // straight off the TObjectName, which matches slice-23's
10755        // correlation invariant: only qualified refs that name an outer
10756        // alias are caught — unqualified refs remain a documented
10757        // schema-less limitation.
10758        TResultColumnList rclForWgWalk = inner.getResultColumnList();
10759        if (rclForWgWalk != null) {
10760            for (int rci = 0; rci < rclForWgWalk.size(); rci++) {
10761                TResultColumn rc = rclForWgWalk.getResultColumn(rci);
10762                Set<TOrderBy> wgOrderBys = collectDirectWithinGroupOrderBys(rc);
10763                for (TOrderBy wgOrderBy : wgOrderBys) {
10764                    for (String alias : collectQualifierAliases(wgOrderBy)) {
10765                        if (!innerLocalAliases.contains(
10766                                alias.toLowerCase(Locale.ROOT))) {
10767                            throw new SemanticIRBuildException(
10768                                    Diagnostic.error(ctx.existsCorrelatedUnknownOuterAlias,
10769                                    "EXISTS in " + ctx.clauseLabel + ": correlated reference to outer alias '"
10770                                            + alias
10771                                            + "' inside WITHIN GROUP (ORDER BY ...) is not supported yet", null));
10772                        }
10773                    }
10774                }
10775            }
10776        }
10777        int idx = stmts.size();
10778        stmts.add(innerStmt);
10779        // Slice 24: emit lineage edges for the predicate body. For
10780        // constant-only inner (slice-23 carryover), the synthetic
10781        // OutputColumn has empty sources and emitLineageForStatement
10782        // emits zero edges — no shape change for slice 23. For
10783        // column-ref inner (slice 24), the real OutputColumn carries
10784        // one ColumnRef source pointing at the inner's local relation;
10785        // emitLineageForStatement emits a STATEMENT_OUTPUT → TABLE_COLUMN
10786        // edge (TABLE-bound inner) or STATEMENT_OUTPUT → STATEMENT_OUTPUT
10787        // edge (CTE-bound inner) that the projector's slice-24 pass uses
10788        // to resolve the JOIN canonical edge.
10789        //
10790        // SUBQUERY map is empty: inner-shape preflight rejects FROM-subqueries.
10791        // ScalarInfo map is empty: inner-shape preflight rejects scalar
10792        // projections (and column-ref projection is single-source, not a
10793        // scalar-subquery extraction).
10794        // Slice 118 — pass the enclosing scope's flattened SUBQUERY-alias
10795        // map under correlation mode so OUTER_REFERENCE-of-SUBQUERY refs
10796        // resolve to the enclosing MERGE's USING-subquery statement index
10797        // for cross-stmt lineage emission (mirrors slice 117 UPDATE-side
10798        // emit dispatch).
10799        Map<String, Integer> subqueryAliasMap = (correlationScope != null)
10800                ? correlationScope.flattenSubqueryAliasToIndex()
10801                : Collections.<String, Integer>emptyMap();
10802        emitLineageForStatement(innerStmt, idx, lineage,
10803                cteMapForExtraction,
10804                subqueryAliasMap,
10805                Collections.<Integer, ScalarInfo>emptyMap());
10806        // The predicate body remains UNREACHABLE from outer: no relation
10807        // in outer points at it, and no STATEMENT_OUTPUT lineage edge has
10808        // it as its `to`. Inner WHERE / inner JOIN refs of the predicate
10809        // body therefore cannot enter outer's row-influence walker. The
10810        // slice-24 projector pass iterates predicate bodies directly via
10811        // `isPredicateSubquerySyntheticName` to emit JOIN canonical edges
10812        // from their OutputColumn sources only (slice-24 §4.2.1).
10813    }
10814
10815    /**
10816     * Slice 23: inner-shape preflight for an extracted EXISTS body. See
10817     * roadmap §14.25 (slice-23 plan §4.4) for the full reasoning.
10818     */
10819    private static void preflightExistsInnerShape(TSelectSqlStatement inner) {
10820        // (a) No set-op
10821        if (inner.getSetOperatorType() != null
10822                && inner.getSetOperatorType() != ESetOperatorType.none) {
10823            throw new SemanticIRBuildException(
10824                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_IS_SET_OP,
10825                    "EXISTS in JOIN ON: inner SELECT may not be a set operation", inner));
10826        }
10827        // (b) No nested CTE list
10828        if (inner.getCteList() != null && inner.getCteList().size() > 0) {
10829            throw new SemanticIRBuildException(
10830                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_WITH,
10831                    "EXISTS in JOIN ON: inner SELECT may not have its own WITH clause", inner));
10832        }
10833        // (c) No row-limit (delegated to rejectUnsupportedShape's row-limit
10834        //     guards which fire during buildSelectStatement). For an early,
10835        //     specific message we also fast-fail here.
10836        if (inner.getLimitClause() != null
10837                || inner.getTopClause() != null
10838                || inner.getFetchFirstClause() != null
10839                || inner.getOffsetClause() != null) {
10840            throw new SemanticIRBuildException(
10841                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_ROW_LIMIT,
10842                    "EXISTS in JOIN ON: inner SELECT may not have a row-limit clause", inner));
10843        }
10844        // (d) Inner FROM is required (codex round-2 MUST 2).
10845        if (inner.joins == null || inner.joins.size() == 0) {
10846            throw new SemanticIRBuildException(
10847                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_MISSING_FROM,
10848                    "EXISTS in JOIN ON: inner SELECT must have a FROM clause "
10849                            + "(degenerate EXISTS (SELECT 1) is not in scope)", inner));
10850        }
10851        // Slice 62 (codex plan-review round 1, P2 #1): predicate bodies
10852        // are built with allowFromSubqueries=false, so the gated reject
10853        // inside buildRelations also fires; we surface a slice-23
10854        // tuned message here so callers see the predicate-body shape
10855        // diagnostic before the generic comma-FROM message.
10856        if (inner.joins.size() > 1) {
10857            throw new SemanticIRBuildException(
10858                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_COMMA_FROM,
10859                    "EXISTS in JOIN ON: comma-separated FROM list "
10860                            + "(implicit cross join) in inner SELECT is not supported yet", inner));
10861        }
10862        // Slice 63: predicate body must not contain explicit CROSS JOIN
10863        // either. Surfaces a predicate-body-tuned diagnostic before the
10864        // gated reject inside buildRelations would fire with the
10865        // generic "scalar / set-op-branch / set-op-CTE / predicate"
10866        // message. The same shared preflight is used by EXISTS /
10867        // IN-SELECT / cmp-subquery / ANY-ALL-SOME wrappers.
10868        // Slice 64: same treatment for JOIN ... USING.
10869        for (TJoin j : inner.joins) {
10870            TJoinItemList items = j.getJoinItems();
10871            if (items == null) continue;
10872            for (int i = 0; i < items.size(); i++) {
10873                TJoinItem item = items.getJoinItem(i);
10874                if (item == null) continue;
10875                if (item.getJoinType() == EJoinType.cross) {
10876                    throw new SemanticIRBuildException(
10877                            Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_CROSS_JOIN,
10878                            "EXISTS in JOIN ON: CROSS JOIN in inner SELECT "
10879                                    + "is not supported yet", null));
10880                }
10881                if (item.getUsingColumns() != null
10882                        && item.getUsingColumns().size() > 0) {
10883                    throw new SemanticIRBuildException(
10884                            Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_USING,
10885                            "EXISTS in JOIN ON: JOIN ... USING (...) in inner "
10886                                    + "SELECT is not supported yet", null));
10887                }
10888                // Slice 66: NATURAL JOIN inside an EXISTS-style predicate
10889                // body is rejected with an EXISTS-tuned diagnostic. The
10890                // gated reject inside buildRelations would fire later
10891                // with the generic body-context message; surfacing here
10892                // gives users an EXISTS / IN-SELECT / cmp-subquery
10893                // friendly error.
10894                if (isNaturalJoinType(item.getJoinType())) {
10895                    throw new SemanticIRBuildException(
10896                            Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_NATURAL,
10897                            "EXISTS in JOIN ON: NATURAL JOIN in inner SELECT "
10898                                    + "is not supported yet", null));
10899                }
10900            }
10901        }
10902        // (d') No FROM-subquery on inner FROM/JOIN list. The recursive build
10903        //     passes allowFromSubqueries=false, so buildRelation would also
10904        //     reject; we surface a slice-23 specific message here.
10905        for (TJoin j : inner.joins) {
10906            if (j.getTable() != null
10907                    && j.getTable().getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
10908                throw new SemanticIRBuildException(
10909                        Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_FROM_SUBQUERY,
10910                        "EXISTS in JOIN ON: FROM-clause subquery in inner SELECT is not supported yet", null));
10911            }
10912            TJoinItemList items = j.getJoinItems();
10913            if (items == null) continue;
10914            for (int i = 0; i < items.size(); i++) {
10915                TTable r = items.getJoinItem(i).getTable();
10916                if (r != null && r.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
10917                    throw new SemanticIRBuildException(
10918                            Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_FROM_SUBQUERY_ON_JOIN,
10919                            "EXISTS in JOIN ON: FROM-clause subquery on JOIN side in inner SELECT is not supported yet", null));
10920                }
10921            }
10922        }
10923        // (e) Result-column list: exactly one column projecting either a
10924        //     constant expression (slice 23), a single column reference
10925        //     (slice 24), an expression / function call / CASE /
10926        //     aggregate over inner columns (slice 27), an aggregate with
10927        //     FILTER (WHERE ...) over inner columns (slice 28), or — on
10928        //     PostgreSQL only — a whitelisted WITHIN GROUP aggregate
10929        //     (slice 29 admits LISTAGG / STRING_AGG / GROUP_CONCAT /
10930        //     ARRAY_AGG / count / sum / avg / min / max / stddev /
10931        //     variance family; slice 30 extends with `mode`).
10932        //     Multi-column / star / window function / scalar subquery /
10933        //     non-whitelisted WITHIN GROUP aggregate projections are
10934        //     rejected with shape-specific tuned messages — see
10935        //     {@link #findUnsupportedWithinGroupFunctionName} for the
10936        //     vendor + name gate.
10937        TResultColumnList rcl = inner.getResultColumnList();
10938        if (rcl == null || rcl.size() != 1) {
10939            throw new SemanticIRBuildException(
10940                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_COLUMN_COUNT,
10941                    "EXISTS in JOIN ON: inner SELECT must project exactly one column, got "
10942                            + (rcl == null ? 0 : rcl.size()), null));
10943        }
10944        TResultColumn rc0 = rcl.getResultColumn(0);
10945        if ("*".equals(rc0.getColumnNameOnly())) {
10946            throw new SemanticIRBuildException(
10947                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_NON_CONSTANT_PROJECTION,
10948                    "EXISTS in JOIN ON: inner SELECT must project a constant expression "
10949                            + "or a single column reference, got SELECT *", null));
10950        }
10951        TExpression projExpr = rc0.getExpr();
10952        if (projExpr == null || !isAdmittedPredicateProjection(projExpr)) {
10953            throw new SemanticIRBuildException(
10954                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_NON_CONSTANT_PROJECTION,
10955                    "EXISTS in JOIN ON: inner SELECT must project a constant expression "
10956                            + "(e.g. SELECT 1), a single column reference "
10957                            + "(e.g. SELECT x.id), an expression / function call / "
10958                            + "CASE / aggregate over inner columns (e.g. SELECT x.id + 1, "
10959                            + "UPPER(x.region), MAX(x.id), CASE WHEN ...), an aggregate "
10960                            + "with FILTER (WHERE ...) over inner columns "
10961                            + "(e.g. SUM(x.id) FILTER (WHERE x.region = 'EU')), or a "
10962                            + "WITHIN GROUP (ORDER BY ...) aggregate over inner columns "
10963                            + "(PostgreSQL admits the direct fn.withinGroup attachment; "
10964                            + "Oracle and SQL Server admit the windowDef.withinGroup "
10965                            + "attachment via slice 31 when no OVER clause is present "
10966                            + "— see TWindowDef.isIncludingOverClause(); slice 44 also "
10967                            + "admits Snowflake hypothetical-set ordered-set aggregates "
10968                            + "(rank / dense_rank / percent_rank / cume_dist) via direct "
10969                            + "fn.withinGroup attachment); DB2 / Snowflake LISTAGG / "
10970                            + "STRING_AGG WITHIN GROUP remain rejected pending a probe "
10971                            + "of their parser-specific argument storage; window "
10972                            + "functions (any OVER-bearing form) and scalar subqueries "
10973                            + "are not supported yet (slice 31 boundary)", null));
10974        }
10975        // Slice 29 / Slice 31: vendor-gated WITHIN GROUP rejecter.
10976        //
10977        // Two attachment styles, gated by vendor:
10978        //   * Direct attachment ({@code fn.getWithinGroup()}): PG admits
10979        //     because its visitor descent (TFunctionCall.acceptChildren)
10980        //     does NOT walk fn.withinGroup, leaving OutputColumn.sources
10981        //     populated exactly with the function's column-bearing args.
10982        //     Snowflake and DB2 use the same field but their parser-
10983        //     specific arg storage (DB2's stringExpr / separatorExpr for
10984        //     LISTAGG) may not be visitor-visible — silently-empty
10985        //     sources while dlineage walks fdd to the base column =
10986        //     manufactured IR_MISSING_DEPENDENCY divergence; rejected.
10987        //   * WindowDef attachment ({@code fn.getWindowDef().getWithinGroup()}
10988        //     with WITHIN-GROUP-only windowDef): Oracle / MSSQL admit via
10989        //     slice 31. The visitor DOES descend through
10990        //     {@code windowDef.withinGroup.orderBy}, so the slice-31
10991        //     source-skip in
10992        //     {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses}
10993        //     keeps OutputColumn.sources from leaking the WITHIN GROUP
10994        //     ORDER BY column refs (probe Q1 / Q3 / Q4 / Q5 in
10995        //     {@code /tmp/probe31}).
10996        //
10997        // Probed: PG (Q1, Q5, Q6, Q9, Q10), Oracle (Q1-Q5 in
10998        // {@code /tmp/probe31}), MSSQL (Q11-Q12), SparkSQL (parser drops
10999        // WITHIN GROUP attachment, so containsAggregateWithWithinGroup
11000        // returns false and the lift applies).
11001        if (containsAggregateWithWithinGroup(projExpr)) {
11002            EDbVendor v = inner.dbvendor;
11003            // Slice 44 / 45: Snowflake admitted at this gate ONLY when
11004            // every WITHIN GROUP-bearing call in the inner projection
11005            // is an admitted Snowflake direct-attachment shape:
11006            //   * hypothetical-set (rank / dense_rank / percent_rank /
11007            //     cume_dist with fn.getWithinGroup()!=null and
11008            //     fn.getWindowDef()==null) — slice 44; or
11009            //   * mode() with the same direct-attachment shape — slice 45.
11010            // Snowflake LISTAGG / STRING_AGG / percentile_cont /
11011            // percentile_disc share the direct-attachment shape but
11012            // their parser-specific argument storage (stringExpr /
11013            // separatorExpr) and / or name-whitelist exclusion keep
11014            // the slice-31/44 rejection (see Slice44Test §C and
11015            // Slice45Test §C boundary tests).
11016            boolean snowflakeAdmittedShape = (v == EDbVendor.dbvsnowflake)
11017                    && allWithinGroupCallsAreAdmittedSnowflakeDirectAttachment(projExpr);
11018            if (v != EDbVendor.dbvpostgresql
11019                    && v != EDbVendor.dbvoracle
11020                    && v != EDbVendor.dbvmssql
11021                    && !snowflakeAdmittedShape) {
11022                throw new SemanticIRBuildException(
11023                        Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_WITHIN_GROUP_AGGREGATE,
11024                        "EXISTS in JOIN ON: WITHIN GROUP aggregate inner projection on "
11025                                + "vendor=" + v
11026                                + " is not supported yet — slice 31 admits PostgreSQL "
11027                                + "(direct fn.withinGroup attachment), Oracle, and "
11028                                + "SQL Server (windowDef.withinGroup attachment with "
11029                                + "isIncludingOverClause=false); slice 44 additionally "
11030                                + "admits Snowflake hypothetical-set ordered-set "
11031                                + "aggregates (rank / dense_rank / percent_rank / "
11032                                + "cume_dist) via direct fn.withinGroup attachment; "
11033                                + "slice 45 additionally admits Snowflake mode() via "
11034                                + "the same direct attachment; "
11035                                + "DB2 / Snowflake LISTAGG / STRING_AGG / "
11036                                + "percentile_cont / other direct-attachment vendors "
11037                                + "remain rejected pending a probe of their parser-"
11038                                + "specific argument storage", null));
11039            }
11040            // Codex impl-review round-3 MUST: name-whitelist guard. The PG
11041            // parser attaches WITHIN GROUP to generic `func_application`,
11042            // not only to whitelisted aggregate names. Without this check,
11043            // a non-whitelisted call like `foo(x.id) WITHIN GROUP (...)`
11044            // would slip through. Slice 31: same protection applies on
11045            // Oracle / MSSQL — the windowDef-attachment grammar admits
11046            // any function name (PERCENTILE_CONT, RANK, user-defined
11047            // foo); the name guard rejects them so the IR never sees a
11048            // shape whose canonical model is unverified.
11049            String unsupportedName = findUnsupportedWithinGroupFunctionName(
11050                    projExpr, inner.dbvendor);
11051            if (unsupportedName != null) {
11052                throw new SemanticIRBuildException(
11053                        Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_WITHIN_GROUP_NON_WHITELISTED,
11054                        "EXISTS in JOIN ON: WITHIN GROUP attached to non-whitelisted "
11055                                + "function '" + unsupportedName + "' is not supported yet — "
11056                                + "slice 31 admits whitelisted aggregates only "
11057                                + "(see SemanticIRBuilder.AGGREGATE_FUNCTION_NAMES); "
11058                                + "slice 43 additionally admits PostgreSQL hypothetical-set "
11059                                + "ordered-set aggregates (rank / dense_rank / percent_rank / "
11060                                + "cume_dist) via direct fn.getWithinGroup attachment", null));
11061            }
11062        }
11063        // (f) No subqueries in inner WHERE / inner JOIN ON / inner GROUP BY /
11064        //     inner HAVING / inner ORDER BY. Reuses the slice-11 helper
11065        //     style with a slice-23-specific message prefix.
11066        rejectSubqueriesInPredicateBodyClauses(inner);
11067        // (g) Window functions are caught by the rejecters that fire inside
11068        //     buildSelectStatement (rejectWindowFunctionInScope on WHERE /
11069        //     GROUP BY / HAVING / ORDER BY); the inner projection itself is
11070        //     a constant expression and cannot contain a window call.
11071    }
11072
11073    /**
11074     * Slice 27: true iff {@code e} is an admitted predicate-subquery inner
11075     * projection shape. Admits (in priority order):
11076     * <ul>
11077     *   <li>{@link EExpressionType#simple_object_name_t} — single column
11078     *       ref (slice 24 carryover); one JOIN canonical edge per inner-
11079     *       column lineage terminal.</li>
11080     *   <li>{@link #isConstantExpression}-shaped constant (slice-23
11081     *       carryover); zero canonical contribution.</li>
11082     *   <li>Slice 27 widenings via {@link #isAdmittedSlice27ShapeRoot}:
11083     *       expression / function call / CASE / aggregate over inner
11084     *       columns. Probes 27 / 27b confirmed dlineage's
11085     *       {@code fdr clause="on"} canonical model walks fdd to the
11086     *       underlying base columns identically to the IR's
11087     *       slice-24 predicate-body sweep — so canonical equivalence
11088     *       holds. Aggregate-over-constants (e.g. {@code COUNT(*)},
11089     *       {@code SUM(1)}) produce empty {@code OutputColumn.sources}
11090     *       and zero predicate-body JOIN edges; canonical-equivalent
11091     *       to the slice-23 constant projection.</li>
11092     * </ul>
11093     * Hard rejecters fire BEFORE the {@link #isAdmittedSlice27ShapeRoot}
11094     * admit-list to keep the surface tight:
11095     * <ul>
11096     *   <li>{@link #containsAnySubqueryExpression} — slice-23 invariant.</li>
11097     *   <li>{@link #containsWindowFunction} — slice-13 invariant.
11098     *       Slice 31 narrowed the rejecter via {@link #isWindowDefBearingFunction}
11099     *       so a WITHIN-GROUP-only windowDef (Oracle / MSSQL plain
11100     *       WITHIN GROUP attachment) is NOT classified as a window
11101     *       function. Real OVER-bearing windowDef shapes ({@code OVER ()},
11102     *       {@code OVER (PARTITION BY ...)}, KEEP DENSE_RANK) continue to
11103     *       fire the rejecter. The complementary slice-31 source-skip in
11104     *       {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses}
11105     *       removes the WITHIN GROUP ORDER BY column refs from
11106     *       {@code OutputColumn.sources} on Oracle / MSSQL so the visitor's
11107     *       descent through {@code windowDef.withinGroup.orderBy} doesn't
11108     *       leak into projection sources.</li>
11109     *   <li>Slice 29 / 31's vendor-gated WITHIN GROUP rejecter at the
11110     *       {@link #preflightExistsInnerShape} call site — see that
11111     *       method's vendor gate. Slice 31 admits Oracle and MSSQL
11112     *       (windowDef.withinGroup attachment with
11113     *       {@code !isIncludingOverClause()}) alongside PostgreSQL.
11114     *       Snowflake and DB2 both attach {@code WITHIN GROUP} to the
11115     *       direct {@code fn.getWithinGroup()} field (same as PG), but
11116     *       their parser-specific argument storage may not be
11117     *       visitor-visible (DB2 stores LISTAGG args in
11118     *       {@code stringExpr} / {@code separatorExpr}, which
11119     *       {@code TFunctionCall.acceptChildren} does NOT walk); they
11120     *       remain rejected. SparkSQL silently drops the WITHIN GROUP
11121     *       attachment at parse time (both {@code fn.withinGroup} and
11122     *       {@code fn.windowDef} are null); after slice 29 SparkSQL
11123     *       admits the same shape as PG, parity-friendly per probe Q1
11124     *       SparkSQL.</li>
11125     * </ul>
11126     *
11127     * <p>Slice 28 lifted the prior {@code containsAggregateWithFilter}
11128     * rejecter; FILTER aggregates are now admitted, with the FILTER
11129     * predicate column refs excluded from {@code OutputColumn.sources}
11130     * globally via the FILTER-aware variant of {@link #collectColumnRefs}
11131     * used in {@link #buildOutputColumns}. See the slice-28 entry in
11132     * §14.5 of the unified roadmap and §B / §C of the slice history
11133     * archive for the load-bearing decision.
11134     *
11135     * <p>Slice 29 lifted the prior unconditional
11136     * {@code containsAggregateWithWithinGroup} rejecter and replaced it
11137     * with a vendor-gated rejecter at the
11138     * {@link #preflightExistsInnerShape} call site (Snowflake / DB2 /
11139     * other non-PostgreSQL vendors that use the direct
11140     * {@code fn.getWithinGroup()} attachment remain rejected). PG
11141     * attaches {@code WITHIN GROUP} to the direct
11142     * {@code fn.getWithinGroup()} field, and
11143     * {@code TFunctionCall.acceptChildren} does NOT descend into that
11144     * field, so {@link #collectColumnRefs} never picks up the ORDER BY
11145     * column refs — no source-skip is needed. dlineage probes Q1–Q10
11146     * confirmed canonical-model JOIN-on edges include only the aggregate's
11147     * primary argument across all four vendors (the WITHIN GROUP ORDER
11148     * BY ref appears as {@code fdr clauseType="orderby"} on PG only, and
11149     * {@code DlineageXmlProjector.projectColumn} follows fdd not fdr).
11150     * Slice 29 is restricted to whitelisted aggregates whose names
11151     * appear in {@link #AGGREGATE_FUNCTION_NAMES}. As of slice 30 the
11152     * whitelist is: {@code count}, {@code sum}, {@code avg}, {@code min},
11153     * {@code max}, {@code stddev}, {@code variance}, {@code var_samp},
11154     * {@code var_pop}, {@code stddev_samp}, {@code stddev_pop},
11155     * {@code listagg}, {@code string_agg}, {@code group_concat},
11156     * {@code array_agg}, {@code mode} (slice-30 addition — PG
11157     * ordered-set aggregate, gated for the WITHIN GROUP path only;
11158     * see {@code DlineageXmlProjector.ORDER_BY_WITHIN_GROUP_AGGREGATE_NAMES}). The predicate-body short-circuit's
11159     * {@code aggregate=true} branch fires for these regardless of
11160     * {@code OutputColumn.sources} content — column-bearing args
11161     * (e.g. {@code LISTAGG(x.id, ',')}) produce a synthesized
11162     * {@code OutputColumn} with {@code sources=[x.id]} that the slice-24
11163     * sweep walks to base-column terminals, while literal-only args
11164     * (e.g. {@code LISTAGG('hello', ',')}) produce {@code sources=[]}
11165     * with zero JOIN canonical edges — canonically equivalent to
11166     * slice-23's constant projection.
11167     *
11168     * <p>Functions NOT in the whitelist (which on PG includes
11169     * {@code percentile_cont}, {@code percentile_disc}, {@code rank},
11170     * {@code dense_rank}, {@code percent_rank}, {@code cume_dist}, plus
11171     * any user-defined function with a direct {@code fn.withinGroup}
11172     * attachment) remain rejected by the
11173     * {@link #findUnsupportedWithinGroupFunctionName} guard at the
11174     * {@link #preflightExistsInnerShape} call site. Slice 30 lifted
11175     * {@code mode} only — the one PG ordered-set aggregate with no
11176     * documented window form in any GSP-supported vendor. Lifting
11177     * {@code percentile_cont} / {@code percentile_disc} requires either
11178     * a vendor-scoped projector OR a structural discriminator strong
11179     * enough to distinguish the cross-vendor windowed forms (Redshift /
11180     * Vertica / BigQuery / Oracle / SQL Server emit
11181     * {@code PERCENTILE_CONT WITHIN GROUP OVER (...)} variants). Lifting
11182     * {@code rank}/{@code dense_rank}/{@code percent_rank}/{@code cume_dist}
11183     * requires distinguishing window form {@code RANK() OVER (ORDER BY)}
11184     * from hypothetical-set form {@code rank(0.5) WITHIN GROUP (ORDER BY)}
11185     * — dlineage XML for the two is structurally identical on PG. See
11186     * §14.6 of the unified roadmap.
11187     */
11188    private static boolean isAdmittedPredicateProjection(TExpression e) {
11189        if (e == null) return false;
11190        if (e.getExpressionType() == EExpressionType.simple_object_name_t) {
11191            return true;                                          // slice 24 (column ref)
11192        }
11193        if (isConstantExpression(e)) return true;                 // slice 23 (constant)
11194        // Slice 27: hard rejecters before admit-list. Slice 28 lifted the
11195        // FILTER rejecter; slice 29 replaced the unconditional WITHIN
11196        // GROUP rejecter with a vendor-gated rejecter at the
11197        // preflightExistsInnerShape call site (see slice-29 §3.2).
11198        if (containsAnySubqueryExpression(e)) return false;       // slice 23 invariant
11199        if (containsWindowFunction(e)) return false;              // slice 13 invariant
11200        return isAdmittedSlice27ShapeRoot(e);
11201    }
11202
11203    /**
11204     * Slice 29: detect a {@code WITHIN GROUP (ORDER BY ...)} attachment
11205     * on the direct {@code fn.getWithinGroup()} field anywhere in the
11206     * subtree. This is the PG / Snowflake / DB2 attachment style. Used
11207     * as a vendor-gated rejecter in {@link #preflightExistsInnerShape}:
11208     * non-admitted vendors with this attachment remain rejected because
11209     * their parser-specific argument storage may not be visitor-visible
11210     * (DB2's {@code LISTAGG} stores args in {@code stringExpr} /
11211     * {@code separatorExpr}, which the default
11212     * {@code TFunctionCall.acceptChildren} does NOT walk —
11213     * {@code OutputColumn.sources} would be silently empty while dlineage
11214     * walks fdd to the base column, manufacturing
11215     * {@code IR_MISSING_DEPENDENCY} divergence).
11216     *
11217     * <p>Slice 31: also detects WITHIN GROUP attached to
11218     * {@code fn.getWindowDef().getWithinGroup()} when the windowDef is
11219     * {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only} — the
11220     * Oracle / MSSQL attachment style. Both attachments are routed
11221     * through {@link #hasWithinGroupAnyAttachment}.
11222     */
11223    private static boolean containsAggregateWithWithinGroup(TExpression e) {
11224        if (e == null) return false;
11225        final boolean[] found = {false};
11226        e.acceptChildren(new TParseTreeVisitor() {
11227            @Override
11228            public void preVisit(TFunctionCall fn) {
11229                if (found[0]) return;
11230                if (hasWithinGroupAnyAttachment(fn)) found[0] = true;
11231            }
11232        });
11233        if (!found[0] && e.getExpressionType() == EExpressionType.function_t) {
11234            TFunctionCall fn = e.getFunctionCall();
11235            if (hasWithinGroupAnyAttachment(fn)) found[0] = true;
11236        }
11237        return found[0];
11238    }
11239
11240    /**
11241     * Slice 31: shared predicate used by {@link #containsAggregateWithWithinGroup}
11242     * and {@link #findUnsupportedWithinGroupFunctionName}. Returns
11243     * {@code true} iff {@code fn} carries {@code WITHIN GROUP} via
11244     * either:
11245     * <ul>
11246     *   <li>direct {@code fn.getWithinGroup()} field (PG / Snowflake /
11247     *       DB2 / SparkSQL parser style);</li>
11248     *   <li>{@code fn.getWindowDef().getWithinGroup()} when the
11249     *       windowDef is {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only}
11250     *       (Oracle / MSSQL parser style).</li>
11251     * </ul>
11252     */
11253    private static boolean hasWithinGroupAnyAttachment(TFunctionCall fn) {
11254        if (fn == null) return false;
11255        if (fn.getWithinGroup() != null) return true;
11256        return isWithinGroupOnlyWindowDef(fn.getWindowDef());
11257    }
11258
11259    /**
11260     * Slice 29 (codex impl-review round-3 MUST): walk the expression
11261     * subtree and return the (lower-cased) function name of any
11262     * {@code TFunctionCall} that carries WITHIN GROUP — via direct
11263     * {@code fn.getWithinGroup()} (PG style) or via
11264     * {@code fn.getWindowDef().getWithinGroup()} when the windowDef is
11265     * {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only} (Oracle /
11266     * MSSQL style; slice 31) — whose name is NOT in
11267     * {@link #AGGREGATE_FUNCTION_NAMES}. Returns {@code null} if every
11268     * WITHIN GROUP-bearing call uses a whitelisted aggregate name.
11269     * Used at the {@code preflightExistsInnerShape} call site to reject
11270     * {@code foo(x.id) WITHIN GROUP (...)}-shaped projections where
11271     * {@code foo} isn't an aggregate the IR knows how to model.
11272     *
11273     * <p>Slice 43: now takes the inner {@link EDbVendor} so the
11274     * {@link #isAdmittedWithinGroupName} delegate can apply the
11275     * PG-only hypothetical-set carve-out
11276     * ({@link #isDirectAttachmentHypotheticalSetCall}; widened to
11277     * Snowflake by slice 44).
11278     */
11279    private static String findUnsupportedWithinGroupFunctionName(
11280            TExpression e, final EDbVendor vendor) {
11281        if (e == null) return null;
11282        final String[] firstUnsupported = {null};
11283        e.acceptChildren(new TParseTreeVisitor() {
11284            @Override
11285            public void preVisit(TFunctionCall fn) {
11286                if (firstUnsupported[0] != null) return;
11287                if (!hasWithinGroupAnyAttachment(fn)) return;
11288                String name = fn.getFunctionName() == null
11289                        ? null : fn.getFunctionName().toString();
11290                if (isAdmittedWithinGroupName(fn, name, vendor)) return;
11291                firstUnsupported[0] = name == null ? "<unnamed>" : name;
11292            }
11293        });
11294        if (firstUnsupported[0] == null
11295                && e.getExpressionType() == EExpressionType.function_t) {
11296            TFunctionCall fn = e.getFunctionCall();
11297            if (hasWithinGroupAnyAttachment(fn)) {
11298                String name = fn.getFunctionName() == null
11299                        ? null : fn.getFunctionName().toString();
11300                if (!isAdmittedWithinGroupName(fn, name, vendor)) {
11301                    firstUnsupported[0] = name == null ? "<unnamed>" : name;
11302                }
11303            }
11304        }
11305        return firstUnsupported[0];
11306    }
11307
11308    /**
11309     * Slice 42 helper used by {@link #findUnsupportedWithinGroupFunctionName}.
11310     * Returns {@code true} iff {@code name} is in the regular
11311     * {@link #AGGREGATE_FUNCTION_NAMES} whitelist, OR — under the
11312     * AST-shape constraint
11313     * {@link #isHypotheticalSetWithinGroupCall} — in the slice-42
11314     * {@link #HYPOTHETICAL_SET_AGGREGATE_NAMES} whitelist (Oracle /
11315     * MSSQL windowDef-bearing attachment), OR — under the slice-43
11316     * AST-shape constraint
11317     * {@link #isDirectAttachmentHypotheticalSetCall} — in the same
11318     * hypothetical-set whitelist on PostgreSQL (slice 43) or Snowflake
11319     * (slice 44) via direct {@code fn.getWithinGroup()} attachment.
11320     *
11321     * <p>The shape constraints pin the carve-outs by parser flavor:
11322     * Oracle / MSSQL produce {@code fn.getWindowDef()!=null} with
11323     * {@code wd.getWithinGroup()!=null} and {@code !wd.isIncludingOverClause()};
11324     * PG produces {@code fn.getWithinGroup()!=null} with
11325     * {@code fn.getWindowDef()==null}. Slice 43 admits that direct-
11326     * attachment hypothetical-set carve-out for PostgreSQL; slice 44
11327     * widens the same probe-confirmed shape to Snowflake. DB2 and other
11328     * direct-attachment vendors remain outside this helper until their
11329     * AST / dlineage parity is explicitly probed and covered.
11330     */
11331    private static boolean isAdmittedWithinGroupName(
11332            TFunctionCall fn, String name, EDbVendor vendor) {
11333        if (name == null || name.isEmpty()) return false;
11334        String lower = name.toLowerCase(Locale.ROOT);
11335        if (AGGREGATE_FUNCTION_NAMES.contains(lower)) return true;
11336        if (isHypotheticalSetWithinGroupCall(fn)) return true;
11337        return isDirectAttachmentHypotheticalSetCall(fn, vendor);
11338    }
11339
11340    /**
11341     * Slice 43 / 44: true iff {@code fn} is a direct-attachment
11342     * hypothetical-set ordered-set aggregate call shape — {@code rank} /
11343     * {@code dense_rank} / {@code percent_rank} / {@code cume_dist} with
11344     * {@code fn.getWithinGroup()!=null} AND {@code fn.getWindowDef()==null},
11345     * AND {@code vendor} is in {PostgreSQL, Snowflake}.
11346     *
11347     * <p>Used as a name-whitelist exception inside
11348     * {@link #isAdmittedWithinGroupName} for predicate-body inner
11349     * projections only. Top-level admission is deliberately not granted:
11350     * top-level lifting requires a vendor-scoped projector override
11351     * (slice 43 introduces the API but defers the override to a future
11352     * slice because PG / Snowflake dlineage XML is structurally
11353     * indistinguishable between the WG and OVER forms — naive override
11354     * breaks {@code rank() OVER (ORDER BY x)} classification).
11355     *
11356     * <p>Vendor gate: PG (slice 43) and Snowflake (slice 44 — probe-
11357     * confirmed AST + dlineage XML byte-identical to PG for all
11358     * four hypothetical-set names). DB2 / Greenplum / Redshift parse-fail
11359     * on the syntax. Other direct-attachment vendors (e.g. SparkSQL drops
11360     * WITHIN GROUP attachment at parse time) remain rejected pending a
11361     * fresh probe.
11362     *
11363     * <p>Probe: {@code /tmp/probe43/Probe43.java} (slice 43) and
11364     * {@code probe44.Probe44Test} (slice 44, captured during slice-44
11365     * implementation) confirmed the AST predicate matches PG / Snowflake
11366     * hypothetical-set forms (and not the OVER form), and confirmed the
11367     * dlineage XML for {@code EXISTS (SELECT rank(0.5) WITHIN GROUP
11368     * (ORDER BY x.salary) FROM locations x)} contributes zero base-table
11369     * edges from the inner predicate body (literal arg + WG ORDER BY ref
11370     * via {@code clauseType="orderby"} fdr that the projector's
11371     * {@code clauseTypeToRole} does not map to FILTER/JOIN). Both
11372     * projectors therefore agree on zero predicate-body lineage edges
11373     * for the slice-43 / slice-44 shape — no projector change required.
11374     */
11375    private static boolean isDirectAttachmentHypotheticalSetCall(
11376            TFunctionCall fn, EDbVendor vendor) {
11377        if (fn == null) return false;
11378        if (vendor != EDbVendor.dbvpostgresql
11379                && vendor != EDbVendor.dbvsnowflake) return false;
11380        if (!isDirectAttachmentHypotheticalSetCallShape(fn)) return false;
11381        return true;
11382    }
11383
11384    /**
11385     * Slice 44: vendor-agnostic shape predicate for the direct-attachment
11386     * hypothetical-set call form ({@code fn.getWithinGroup()!=null} AND
11387     * {@code fn.getWindowDef()==null} AND function name in
11388     * {@link #HYPOTHETICAL_SET_AGGREGATE_NAMES}). Used together with
11389     * {@link #isDirectAttachmentModeCallShape} (slice 45) by
11390     * {@link #allWithinGroupCallsAreAdmittedSnowflakeDirectAttachment} to
11391     * gate Snowflake admission. Snowflake LISTAGG / STRING_AGG /
11392     * percentile_cont WITHIN GROUP share this attachment style but their
11393     * parser-specific argument storage ({@code stringExpr} /
11394     * {@code separatorExpr}) and dlineage XML parity remain unprobed
11395     * (slice-31 boundary preserved).
11396     */
11397    private static boolean isDirectAttachmentHypotheticalSetCallShape(
11398            TFunctionCall fn) {
11399        if (fn == null) return false;
11400        if (fn.getWithinGroup() == null) return false;
11401        if (fn.getWindowDef() != null) return false;
11402        if (fn.getFunctionName() == null) return false;
11403        String name = fn.getFunctionName().toString();
11404        if (name == null || name.isEmpty()) return false;
11405        return HYPOTHETICAL_SET_AGGREGATE_NAMES.contains(
11406                name.toLowerCase(Locale.ROOT));
11407    }
11408
11409    /**
11410     * Slice 45: vendor-agnostic shape predicate for the direct-attachment
11411     * {@code mode()} ordered-set aggregate call form
11412     * ({@code fn.getWithinGroup()!=null} AND
11413     * {@code fn.getWindowDef()==null} AND function name equals
11414     * {@code mode}). Parallel to
11415     * {@link #isDirectAttachmentHypotheticalSetCallShape}; used by
11416     * {@link #allWithinGroupCallsAreAdmittedSnowflakeDirectAttachment}
11417     * to admit Snowflake {@code mode() WITHIN GROUP (ORDER BY ...)}
11418     * predicate-body inner projections.
11419     *
11420     * <p>Probe-confirmed (see {@code /tmp/Probe45c.java} captured during
11421     * slice-45 implementation): Snowflake parses {@code mode() WITHIN
11422     * GROUP (ORDER BY x.salary)} with {@code fn.getWithinGroup() != null}
11423     * and {@code fn.getWindowDef() == null}, identical to PG. The
11424     * Snowflake dlineage XML for the predicate-body wrapper shape is
11425     * byte-equivalent to PG (same {@code resultset name="mode"
11426     * type="function"} wrapper, same {@code orderby} fdr that
11427     * {@code clauseTypeToRole} does not map to FILTER/JOIN); the
11428     * canonical model has zero predicate-body lineage edges, matching
11429     * the IR side (mode has no args, default visitor descent does not
11430     * walk direct {@code fn.withinGroup}).
11431     *
11432     * <p>Why mode is admitted but Snowflake LISTAGG / STRING_AGG /
11433     * percentile_cont aren't (slice-44/45 boundaries): mode has no
11434     * positional argument, so the OutputColumn.sources collection is
11435     * trivially empty and matches the dlineage zero-edge canonical model.
11436     * LISTAGG / STRING_AGG store args in parser-specific
11437     * {@code stringExpr} / {@code separatorExpr} fields whose visitor
11438     * descent has not been probed; admitting them risks
11439     * silently-empty IR sources against a non-empty dlineage column-arg
11440     * fdd. percentile_cont / percentile_disc use a literal arg
11441     * (slice-44 §C boundary preserved) but are not in
11442     * {@link #AGGREGATE_FUNCTION_NAMES}, so the slice-29 name-whitelist
11443     * guard fires inside {@link #findUnsupportedWithinGroupFunctionName}
11444     * and rejects regardless of vendor gate.
11445     */
11446    private static boolean isDirectAttachmentModeCallShape(
11447            TFunctionCall fn) {
11448        if (fn == null) return false;
11449        if (fn.getWithinGroup() == null) return false;
11450        if (fn.getWindowDef() != null) return false;
11451        if (fn.getFunctionName() == null) return false;
11452        String name = fn.getFunctionName().toString();
11453        if (name == null || name.isEmpty()) return false;
11454        return "mode".equals(name.toLowerCase(Locale.ROOT))
11455                && hasNoFunctionArgs(fn);
11456    }
11457
11458    private static boolean hasNoFunctionArgs(TFunctionCall fn) {
11459        return fn != null && (fn.getArgs() == null || fn.getArgs().size() == 0);
11460    }
11461
11462    /**
11463     * Slice 45 (renamed and widened from the slice-44 helper
11464     * {@code allWithinGroupCallsAreDirectAttachmentHypotheticalSet}):
11465     * returns {@code true} iff {@code e} contains at least one WITHIN
11466     * GROUP-bearing function call AND every such call uses an
11467     * <i>admitted</i> Snowflake direct-attachment shape — either
11468     * hypothetical-set ({@link #isDirectAttachmentHypotheticalSetCallShape},
11469     * slice 44) or mode ({@link #isDirectAttachmentModeCallShape},
11470     * slice 45). Used to gate the predicate-body vendor whitelist widen
11471     * at the {@code preflightExistsInnerShape} call site so Snowflake is
11472     * admitted only on these probe-confirmed shapes — Snowflake LISTAGG /
11473     * STRING_AGG / percentile_cont / percentile_disc / other names
11474     * remain rejected (their parser-specific argument storage and
11475     * dlineage XML parity are unprobed; slice-31/44 boundary
11476     * preserved).
11477     *
11478     * <p>Mixed expressions (e.g. {@code mode() WG (...) || rank(0.5)
11479     * WG (...)} in a single predicate-body inner projection) are
11480     * admitted when every WG-bearing call is admitted-shape;
11481     * one non-admitted-shape call blocks the whole expression
11482     * (Slice45Test §D).
11483     */
11484    private static boolean allWithinGroupCallsAreAdmittedSnowflakeDirectAttachment(
11485            TExpression e) {
11486        if (e == null) return false;
11487        final boolean[] sawAny = {false};
11488        final boolean[] sawNonAdmitted = {false};
11489        e.acceptChildren(new TParseTreeVisitor() {
11490            @Override
11491            public void preVisit(TFunctionCall fn) {
11492                if (!hasWithinGroupAnyAttachment(fn)) return;
11493                sawAny[0] = true;
11494                if (!isDirectAttachmentHypotheticalSetCallShape(fn)
11495                        && !isDirectAttachmentModeCallShape(fn)) {
11496                    sawNonAdmitted[0] = true;
11497                }
11498            }
11499        });
11500        if (e.getExpressionType() == EExpressionType.function_t) {
11501            TFunctionCall fn = e.getFunctionCall();
11502            if (fn != null && hasWithinGroupAnyAttachment(fn)) {
11503                sawAny[0] = true;
11504                if (!isDirectAttachmentHypotheticalSetCallShape(fn)
11505                        && !isDirectAttachmentModeCallShape(fn)) {
11506                    sawNonAdmitted[0] = true;
11507                }
11508            }
11509        }
11510        return sawAny[0] && !sawNonAdmitted[0];
11511    }
11512
11513    /**
11514     * Slice 27: fail-closed enumeration of admitted projection root shapes
11515     * after the slice-23/24 fast paths and the hard-rejecter guards have
11516     * been considered by {@link #isAdmittedPredicateProjection}.
11517     * Open-ended type checks are intentionally avoided
11518     * (slice-history §C / codex round-1 SHOULD 5).
11519     *
11520     * <p>Admits:
11521     * <ul>
11522     *   <li>{@code function_t} — any function call (aggregate or scalar).
11523     *       OVER-bearing window functions are rejected by the caller's
11524     *       {@code containsWindowFunction} guard (slice 31 narrowed via
11525     *       {@link #isWindowDefBearingFunction} so WITHIN-GROUP-only
11526     *       windowDef passes; OVER-bearing forms still rejected).
11527     *       {@code FILTER (WHERE ...)} was admitted in slice 28 (with
11528     *       FILTER predicate refs excluded from {@code OutputColumn.sources}
11529     *       via {@link #collectColumnRefsExcludingFilterClauses} —
11530     *       slice 31 widens to
11531     *       {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses}).
11532     *       PG-style direct {@code fn.withinGroup} attachment was admitted
11533     *       in slice 29 via the vendor-gated rejecter at the
11534     *       {@link #preflightExistsInnerShape} call site; slice 31 extends
11535     *       admission to Oracle / MSSQL windowDef-bearing WITHIN GROUP
11536     *       (Snowflake / DB2 / other direct-attachment vendors remain
11537     *       rejected pending probe).</li>
11538     *   <li>{@code case_t} — simple or searched CASE.</li>
11539     *   <li>Pure binary ({@link TExpression#isPureBinaryForDoParse}) —
11540     *       arithmetic, concat, comparison.</li>
11541     *   <li>{@code parenthesis_t} — descend.</li>
11542     *   <li>{@code typecast_t} — PostgreSQL / Snowflake / Redshift
11543     *       {@code expr::TYPE} (slice 37; cross-vendor probe slice 38).
11544     *       Admit unconditionally; the slice-13 invariant rejecters
11545     *       ({@link #containsAnySubqueryExpression} /
11546     *       {@link #containsWindowFunction}) fire BEFORE this admit check
11547     *       inside {@link #isAdmittedPredicateProjection}, so
11548     *       {@code (SELECT 1)::INT} and {@code (ROW_NUMBER() OVER ())::INT}
11549     *       are still rejected. The default visitor descent walks
11550     *       {@code typecast_t.getLeftOperand()} so
11551     *       {@code OutputColumn.sources} populates with the underlying
11552     *       column refs (probe-verified for PG —
11553     *       {@code /tmp/probe37/Probe37.java}; slice 38 extended the probe
11554     *       to Snowflake and Redshift —
11555     *       {@code /tmp/probe38/Probe38.java}, {@code CheckCurrent.java} —
11556     *       and confirmed byte-identical AST + dlineage XML to PG for both
11557     *       {@code x.id::VARCHAR [AS lst]} and {@code LOWER(x.id)::VARCHAR}
11558     *       composed forms with zero divergence; slice 39 extended the probe
11559     *       to Greenplum, Vertica, GaussDB, Netezza —
11560     *       {@code /tmp/probe39/Probe39.java}, {@code Probe39b.java} — and
11561     *       confirmed AST + dlineage XML byte-identical to the PG / Snowflake /
11562     *       Redshift contract for both aliased and unaliased forms with zero
11563     *       divergence; slice 40 extended the probe to BigQuery, Trino, Presto,
11564     *       EDB, DuckDB, Databricks —
11565     *       {@code /tmp/probe40/Probe40.java}, {@code Probe40b.java},
11566     *       {@code Probe40c.java}, {@code Probe40d.java}, {@code Probe40e.java}
11567     *       — and confirmed AST + dlineage XML byte-identical to the
11568     *       PG / Snowflake / Redshift contract for aliased, unaliased, and
11569     *       {@code LOWER(x.id)::VARCHAR} composed forms with zero divergence;
11570     *       slice 41 closes out the residual vendor matrix —
11571     *       {@code /tmp/probe41/Probe41.java}, {@code Probe41b.java} —
11572     *       confirming Informix native {@code typecast_t} (AST + dlineage XML
11573     *       byte-identical to the PG / Snowflake / Redshift contract);
11574     *       ClickHouse parser auto-lowers {@code expr::TYPE} to
11575     *       {@code function_t} so the slice-27 admission applies;
11576     *       Sybase / Flink / Dameng parse-fail on {@code ::TYPE} but accept
11577     *       {@code CAST(x AS TYPE)} via {@code function_t} (slice-27
11578     *       carryover); Exasol / AzureSQL parse {@code expr::TYPE} as
11579     *       {@code simple_object_name_t} (vendor-quirk — the {@code ::} is
11580     *       interpreted as a qualified-name separator, mirroring T-SQL's
11581     *       {@code tablename::method()} schema-qualified syntax) so the
11582     *       slice-32 exclusion routes via normal column handling;
11583     *       OceanBase / Impala / StarRocks parse-fail boundary locked in).
11584     *       Oracle uses {@code CAST(x AS TYPE)} which parses as
11585     *       {@code function_t} (already admitted above), so no Oracle-specific
11586     *       {@code typecast_t} admission is needed; Hive / SparkSQL parse-fail
11587     *       on the {@code ::TYPE} syntax — slice 39 pins this boundary; slice
11588     *       40 extends the parse-fail boundary lock-in to DB2, Teradata, MySQL,
11589     *       and HANA so a future grammar lift fires loudly and re-probe is
11590     *       required before relying on zero-divergence for those dialects.
11591     *       The slice-37 admission remains structural (no vendor gate); future
11592     *       vendors that surface {@code typecast_t} will be admitted
11593     *       automatically — re-probe before relying on zero-divergence
11594     *       guarantees.</li>
11595     * </ul>
11596     * Implicitly rejects {@code list_t}, {@code subquery_t} (caught by the
11597     * caller), and any unknown expression type.
11598     */
11599    private static boolean isAdmittedSlice27ShapeRoot(TExpression e) {
11600        if (e == null) return false;
11601        EExpressionType t = e.getExpressionType();
11602        if (t == EExpressionType.function_t) return true;
11603        if (t == EExpressionType.case_t) return true;
11604        if (t == EExpressionType.parenthesis_t) {
11605            return e.getLeftOperand() != null
11606                    && isAdmittedSlice27ShapeRoot(e.getLeftOperand());
11607        }
11608        if (t == EExpressionType.typecast_t) return true;     // slice 37 (cross-vendor parity probed in slice 38; widened in slice 39 + slice 40; residual vendors locked in by slice 41 — Informix typecast_t)
11609        if (TExpression.isPureBinaryForDoParse(t)) return true;
11610        return false;
11611    }
11612
11613    /**
11614     * Slice 27: visitor-based deep window-function detector. Mirrors
11615     * {@link #rejectWindowFunctions} (line ~4530) but boolean-returning so
11616     * it can be used as a guard inside
11617     * {@link #isAdmittedPredicateProjection}.
11618     *
11619     * <p>Slice 31: discriminates WITHIN-GROUP-only windowDef shapes
11620     * (Oracle / MSSQL plain {@code WITHIN GROUP (ORDER BY ...)} attachment
11621     * without OVER) from OVER-bearing ones via
11622     * {@link #isWindowDefBearingFunction}. Plain WITHIN GROUP no longer
11623     * counts as a window function for predicate-body inner-projection
11624     * admission. NOTE: only this helper and {@link #isAggregateFunction}
11625     * are lifted — every other slice-13 invariant rejecter
11626     * ({@link #rejectHavingWindowFunction},
11627     * {@link #rejectOrderByWindowFunction},
11628     * {@link #rejectWindowFunctionInScope},
11629     * {@link #rejectWindowFunctions},
11630     * {@link #rejectEmbeddedWindowFunction},
11631     * {@link #isTopLevelWindowProjection}, and the OVER ORDER BY
11632     * window check inside {@code buildWindowOrderRefs}) keeps the
11633     * strict {@code wd != null} check unchanged so HAVING / ORDER BY /
11634     * WHERE / GROUP BY / JOIN ON / top-level projection contexts still
11635     * reject WITHIN-GROUP-only attachments — slice 31 boundary.
11636     */
11637    private static boolean containsWindowFunction(TExpression e) {
11638        if (e == null) return false;
11639        final boolean[] found = {false};
11640        e.acceptChildren(new TParseTreeVisitor() {
11641            @Override
11642            public void preVisit(TFunctionCall fn) {
11643                if (found[0]) return;
11644                if (isWindowDefBearingFunction(fn)) found[0] = true;
11645            }
11646        });
11647        if (!found[0] && e.getExpressionType() == EExpressionType.function_t) {
11648            TFunctionCall fn = e.getFunctionCall();
11649            if (isWindowDefBearingFunction(fn)) found[0] = true;
11650        }
11651        return found[0];
11652    }
11653
11654    /**
11655     * Slice 31: discriminate WITHIN-GROUP-only {@link TWindowDef} shapes
11656     * from OVER-bearing ones. Returns {@code true} iff:
11657     * <ul>
11658     *   <li>{@code wd.getWithinGroup() != null} — the discriminating
11659     *       attachment;</li>
11660     *   <li>{@code !wd.isIncludingOverClause()} — no OVER syntax of any
11661     *       kind (including empty {@code OVER ()});</li>
11662     *   <li>{@code wd.getKeepDenseRankClause() == null} — Oracle KEEP
11663     *       DENSE_RANK FIRST/LAST is a slice-22 deferred shape and must
11664     *       remain windowed.</li>
11665     * </ul>
11666     *
11667     * <p>Probe-validated: {@code TWindowDef.isIncludingOverClause()} is
11668     * {@code false} for plain {@code WITHIN GROUP} and {@code true} for
11669     * any OVER-bearing form (probe Q8 / Q9 / Q11 in
11670     * {@code /tmp/probe31}).
11671     *
11672     * <p>Used by {@link #isWindowDefBearingFunction} (the slice-13
11673     * invariant lift's discriminator).
11674     */
11675    private static boolean isWithinGroupOnlyWindowDef(TWindowDef wd) {
11676        if (wd == null) return false;
11677        if (wd.isIncludingOverClause()) return false;
11678        if (wd.getWithinGroup() == null) return false;
11679        if (wd.getKeepDenseRankClause() != null) return false;
11680        return true;
11681    }
11682
11683    /**
11684     * Slice 31: a {@link TFunctionCall} is an OVER-bearing window-def
11685     * function iff its {@code windowDef} is non-null AND not
11686     * {@link #isWithinGroupOnlyWindowDef WITHIN-GROUP-only}. Replaces
11687     * the historical {@code fn.getWindowDef() != null} check inside
11688     * {@link #containsWindowFunction} and {@link #isAggregateFunction};
11689     * every other rejecter retains the strict {@code wd != null} check
11690     * unchanged (slice 31 narrow lift).
11691     */
11692    private static boolean isWindowDefBearingFunction(TFunctionCall fn) {
11693        if (fn == null) return false;
11694        TWindowDef wd = fn.getWindowDef();
11695        if (wd == null) return false;
11696        return !isWithinGroupOnlyWindowDef(wd);
11697    }
11698
11699    /**
11700     * Slice 33: a {@link TFunctionCall} is an admitted top-level
11701     * WITHIN-GROUP-only aggregate iff:
11702     *
11703     * <ul>
11704     *   <li>{@link #isWithinGroupOnlyWindowDef} returns true on its
11705     *       windowDef (Oracle / MSSQL plain {@code WITHIN GROUP}, no
11706     *       OVER, no KEEP DENSE_RANK);</li>
11707     *   <li>{@code vendor} is Oracle or MSSQL — explicit gate mirroring
11708     *       the slice-31 predicate-body gate at line ~3860. PG /
11709     *       Snowflake / DB2 / SparkSQL produce direct
11710     *       {@code fn.withinGroup} attachment with {@code windowDef=null}
11711     *       and don't reach this helper today, but the explicit gate
11712     *       keeps the contract narrow against future parser changes;</li>
11713     *   <li>The function name is in {@link #AGGREGATE_FUNCTION_NAMES}
11714     *       (LISTAGG / STRING_AGG / SUM / MIN / MAX / MODE / etc.).</li>
11715     * </ul>
11716     *
11717     * <p>Used only by {@link #buildOutputColumns} to fall through to the
11718     * normal aggregate path. The slice-13 invariant rejecters
11719     * ({@link #isTopLevelWindowProjection},
11720     * {@link #rejectWindowFunctions},
11721     * {@link #rejectEmbeddedWindowFunction},
11722     * {@link #rejectHavingWindowFunction},
11723     * {@link #rejectOrderByWindowFunction},
11724     * {@link #rejectWindowFunctionInScope})
11725     * keep the strict {@code wd != null} check unchanged; slice 33
11726     * admission is gated by a single boolean local to
11727     * {@code buildOutputColumns}.
11728     *
11729     * <p>Non-whitelisted names (PERCENTILE_CONT / PERCENTILE_DISC /
11730     * RANK / DENSE_RANK / PERCENT_RANK / CUME_DIST / user-defined)
11731     * keep routing to {@link #buildWindowOutputColumn} where the
11732     * {@link #WINDOW_FUNCTION_NAMES} guard rejects them as
11733     * "unsupported window function".
11734     */
11735    private static boolean isAdmittedTopLevelWithinGroupAggregate(
11736            TFunctionCall fn, EDbVendor vendor) {
11737        if (fn == null) return false;
11738        if (!isWithinGroupOnlyWindowDef(fn.getWindowDef())) return false;
11739        if (vendor != EDbVendor.dbvoracle && vendor != EDbVendor.dbvmssql) {
11740            return false;
11741        }
11742        if (fn.getFunctionName() == null) return false;
11743        String name = fn.getFunctionName().toString();
11744        if (name == null || name.isEmpty()) return false;
11745        String lower = name.toLowerCase(Locale.ROOT);
11746        if (AGGREGATE_FUNCTION_NAMES.contains(lower)) return true;
11747        // Slice 42: hypothetical-set ordered-set aggregates (RANK /
11748        // DENSE_RANK / PERCENT_RANK / CUME_DIST) admitted on
11749        // Oracle / MSSQL with WITHIN-GROUP-only windowDef shape. The
11750        // surrounding {@code isWithinGroupOnlyWindowDef} guard above
11751        // already enforces the shape; the name-set membership here keeps
11752        // PERCENTILE_CONT / PERCENTILE_DISC / user-defined names rejected.
11753        return HYPOTHETICAL_SET_AGGREGATE_NAMES.contains(lower);
11754    }
11755
11756    /**
11757     * Slice 35 / 36 / 46: top-level direct-attachment WITHIN GROUP
11758     * aggregate.
11759     *
11760     * <p>PG stores {@code LISTAGG(... ) WITHIN GROUP (...)} (slice 35) and
11761     * {@code STRING_AGG(... ) WITHIN GROUP (...)} (slice 36) in
11762     * {@code fn.getWithinGroup()} with {@code windowDef=null}. Both bypass
11763     * the slice-33 Oracle/MSSQL helper above, but the plain aggregate path
11764     * is otherwise already correct: the root is not a window function,
11765     * {@link #isAggregateFunction} sees the whitelisted name, and default
11766     * visitor descent does not walk direct {@code fn.withinGroup}, so
11767     * sources contain the function argument but not the WITHIN GROUP ORDER
11768     * BY ref. This helper exists only to unlock the slice-34 expression-text
11769     * fallback for the unaliased top-level form.
11770     *
11771     * <p>Slice 36 widens the PG name whitelist from {@code {listagg}} to
11772     * {@code {listagg, string_agg}}. Snowflake / DB2 / SparkSQL
11773     * {@code LISTAGG} / {@code STRING_AGG} WG remain rejected because
11774     * their argument-storage shape (e.g. DB2's {@code stringExpr}/
11775     * {@code separatorExpr}) is not yet probed for visitor descent and
11776     * silent empty {@code OutputColumn.sources} would manufacture
11777     * {@code IR_MISSING_DEPENDENCY} divergence.
11778     *
11779     * <p>Slice 46 widens the vendor gate to additionally admit
11780     * Snowflake — but only for {@code mode}, the only Snowflake
11781     * direct-attachment WITHIN GROUP name whose dlineage XML has been
11782     * probe-confirmed byte-equivalent to PG (the projector's slice-30
11783     * vendor-agnostic {@code AGGREGATE_FUNCTION_NAMES} +
11784     * {@code ORDER_BY_WITHIN_GROUP_AGGREGATE_NAMES} entries already
11785     * cover {@code mode}, so no projector change is needed). Snowflake
11786     * {@code listagg} / {@code string_agg} / hypothetical-set names
11787     * stay out of slice-46 scope.
11788     *
11789     * <p>Slice 47 widens the PG name whitelist from
11790     * {@code {listagg, string_agg}} to {@code {listagg, string_agg, mode}},
11791     * the symmetrical lift to slice 46's Snowflake widen. The slice-46
11792     * pre-plan probe ({@code /tmp/Probe46Slice30.java}) already
11793     * confirmed PG aliased {@code mode()} WG was zero-divergence and the
11794     * unaliased form was blocked only by {@code effectiveOutputName}; PG
11795     * top-level {@code mode()} dlineage XML is byte-identical to
11796     * Snowflake's. No projector change is needed (slice 30 already
11797     * registered {@code mode}, vendor-agnostic).
11798     */
11799    private static boolean isAdmittedTopLevelDirectWithinGroupAggregate(
11800            TFunctionCall fn, EDbVendor vendor) {
11801        if (fn == null) return false;
11802        if (fn.getWithinGroup() == null) return false;
11803        if (fn.getWindowDef() != null) return false;
11804        if (fn.getFunctionName() == null) return false;
11805        String name = fn.getFunctionName().toString();
11806        if (name == null || name.isEmpty()) return false;
11807        String lower = name.toLowerCase(Locale.ROOT);
11808        if (vendor == EDbVendor.dbvpostgresql) {
11809            // Slice 35/36: PG LISTAGG / STRING_AGG WG.
11810            // Slice 47: PG mode() WG (parallels slice-46 Snowflake mode
11811            // lift; probed mode() carries no positional argument so
11812            // OutputColumn.sources is trivially empty matching
11813            // dlineage's zero-edge canonical model).
11814            return "listagg".equals(lower)
11815                    || "string_agg".equals(lower)
11816                    || ("mode".equals(lower) && hasNoFunctionArgs(fn));
11817        }
11818        if (vendor == EDbVendor.dbvsnowflake) {
11819            // Slice 46: Snowflake mode() WG only. The admitted shape is
11820            // constrained to the probed no-arg mode() form: OutputColumn.
11821            // sources is trivially empty (matching dlineage's zero-edge
11822            // canonical model), and dlineage XML was probe-confirmed
11823            // byte-identical to PG's mode() WG XML for both aliased and
11824            // unaliased forms.
11825            return "mode".equals(lower) && hasNoFunctionArgs(fn);
11826        }
11827        return false;
11828    }
11829    /**
11830     * Slice 27: detect {@code FILTER (WHERE ...)} on any function call in
11831     * the expression subtree. Probe Q5 (PostgreSQL) confirmed dlineage's
11832     * {@code fdr clause="on"} omits FILTER-predicate column refs while
11833     * {@link #collectColumnRefs} would include them — canonical-model
11834     * divergence. Reject as slice-27 boundary.
11835     */
11836    private static boolean containsAggregateWithFilter(TExpression e) {
11837        if (e == null) return false;
11838        final boolean[] found = {false};
11839        e.acceptChildren(new TParseTreeVisitor() {
11840            @Override
11841            public void preVisit(TFunctionCall fn) {
11842                if (found[0]) return;
11843                if (fn.getFilterClause() != null) found[0] = true;
11844            }
11845        });
11846        if (!found[0] && e.getExpressionType() == EExpressionType.function_t) {
11847            TFunctionCall fn = e.getFunctionCall();
11848            if (fn != null && fn.getFilterClause() != null) found[0] = true;
11849        }
11850        return found[0];
11851    }
11852
11853    /**
11854     * Slice 23: true iff every leaf of {@code e} is a {@code simple_constant_t}.
11855     * Admits {@code 1}, {@code 1+1}, {@code (1)}, {@code 'a' || 'b'} (vendor-
11856     * dependent). Slice 61 additionally admits unary {@code +}/{@code -}
11857     * wrappers over a constant operand so common signed literals like
11858     * {@code -1} and {@code -1.5} count as constants. Rejects column refs,
11859     * function calls (including {@code COALESCE}), CASE, scalar subqueries,
11860     * etc. The predicate body may STILL have inner WHERE / GROUP BY /
11861     * HAVING / ORDER BY referencing inner columns — only the projection
11862     * must be constant.
11863     */
11864    private static boolean isConstantExpression(TExpression e) {
11865        if (e == null) return false;
11866        EExpressionType t = e.getExpressionType();
11867        if (t == EExpressionType.simple_constant_t) return true;
11868        if (t == EExpressionType.parenthesis_t) {
11869            return e.getLeftOperand() != null && isConstantExpression(e.getLeftOperand());
11870        }
11871        // Slice 61: unary +/- over a constant operand. The Oracle parser
11872        // emits `-1` as {@code unary_minus_t} with {@code left=null} and
11873        // {@code right=simple_constant_t(1)}. Pre-slice-61 this fell
11874        // through and was rejected; slice 61 lifts it so signed literals
11875        // like `SELECT -1 FROM t` and `SELECT -1 UNION ALL SELECT -2`
11876        // round-trip through the constant-projection path.
11877        if (t == EExpressionType.unary_minus_t || t == EExpressionType.unary_plus_t) {
11878            TExpression operand = e.getRightOperand() != null
11879                    ? e.getRightOperand()
11880                    : e.getLeftOperand();
11881            return operand != null && isConstantExpression(operand);
11882        }
11883        // Pure binary ops (slice-22 isPureBinaryForDoParse helper) — both
11884        // operands must be constant. Concatenation, arithmetic, etc.
11885        if (TExpression.isPureBinaryForDoParse(t)) {
11886            TExpression l = e.getLeftOperand();
11887            TExpression r = e.getRightOperand();
11888            return l != null && isConstantExpression(l)
11889                    && r != null && isConstantExpression(r);
11890        }
11891        return false;
11892    }
11893
11894    /**
11895     * Slice 23: reject subqueries in an EXISTS body's inner WHERE / JOIN ON /
11896     * GROUP BY / HAVING / ORDER BY (would be predicate subqueries OR scalar
11897     * subqueries). Mirrors the slice-11
11898     * {@link #rejectSubqueriesInScalarBodyClauses} structure but uses a
11899     * slice-23-specific error message.
11900     */
11901    private static void rejectSubqueriesInPredicateBodyClauses(TSelectSqlStatement inner) {
11902        TWhereClause where = inner.getWhereClause();
11903        if (where != null && containsAnySubquery(where)) {
11904            throw new SemanticIRBuildException(
11905                    Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_SUBQUERY_IN_WHERE,
11906                    "EXISTS in JOIN ON: inner SELECT has a subquery in its WHERE clause; "
11907                            + "not supported yet", null));
11908        }
11909        if (inner.joins != null) {
11910            for (TJoin join : inner.joins) {
11911                TJoinItemList items = join.getJoinItems();
11912                if (items == null) continue;
11913                for (int i = 0; i < items.size(); i++) {
11914                    TJoinItem item = items.getJoinItem(i);
11915                    TExpression onCond = item == null ? null : item.getOnCondition();
11916                    if (onCond != null && containsAnySubqueryExpression(onCond)) {
11917                        throw new SemanticIRBuildException(
11918                                Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_SUBQUERY_IN_JOIN_ON,
11919                                "EXISTS in JOIN ON: inner SELECT has a subquery in a JOIN ON "
11920                                        + "clause; not supported yet", null));
11921                    }
11922                }
11923            }
11924        }
11925        TGroupBy groupBy = inner.getGroupByClause();
11926        if (groupBy != null) {
11927            TGroupByItemList items = groupBy.getItems();
11928            if (items != null && containsAnySubquery(items)) {
11929                throw new SemanticIRBuildException(
11930                        Diagnostic.error(DiagnosticCode.JOIN_ON_EXISTS_INNER_SUBQUERY_IN_GROUP_BY,
11931                        "EXISTS in JOIN ON: inner SELECT has a subquery in a GROUP BY clause; "
11932                                + "not supported yet", null));
11933            }
11934        }
11935        // HAVING / ORDER BY subqueries are caught by the slice-9 / 10
11936        // deep-scan rejecters that fire during the recursive build.
11937    }
11938
11939    /**
11940     * Slice 25 (impl-review S2-fix): walk {@code onCond} (root + every
11941     * descendant outside {@code extractedRoots}) and return the first
11942     * wrapper expression whose <b>left</b> operand is a
11943     * {@code subquery_t}. Returns null if none.
11944     */
11945    private static TExpression findSubqueryOnLeftWrapper(TExpression onCond,
11946                                                          final Set<TExpression> extractedRoots) {
11947        if (onCond == null) return null;
11948        if (isSubqueryOnLeftOfWrapper(onCond)) return onCond;
11949        final TExpression[] found = {null};
11950        onCond.acceptChildren(new TParseTreeVisitor() {
11951            int skipDepth = 0;
11952
11953            @Override
11954            public void preVisit(TExpression e) {
11955                if (found[0] != null) return;
11956                if (extractedRoots.contains(e)) {
11957                    skipDepth++;
11958                    return;
11959                }
11960                if (skipDepth > 0) return;
11961                if (isSubqueryOnLeftOfWrapper(e)) {
11962                    found[0] = e;
11963                }
11964            }
11965
11966            @Override
11967            public void postVisit(TExpression e) {
11968                if (extractedRoots.contains(e) && skipDepth > 0) {
11969                    skipDepth--;
11970                }
11971            }
11972        });
11973        return found[0];
11974    }
11975
11976    /**
11977     * Slice 25 / Slice 26: true iff {@code e} is a comparison/IN/
11978     * quantifier wrapper that the slice-25 / slice-26 surface still
11979     * rejects on subquery-positioning grounds.
11980     *
11981     * <p>Slice 26 narrowing: {@code simple_comparison_t} with subquery
11982     * on the LHS and a non-subquery RHS is now ADMITTED (returns
11983     * false here so the post-extraction rejecter doesn't fire). Other
11984     * shapes still fail:
11985     * <ul>
11986     *   <li>{@code in_t} with LHS=subquery: dlineage's
11987     *       {@code fdr clause="on"} sources omit the outer column for
11988     *       IN-LHS, so admitting on the IR side would manufacture
11989     *       canonical-model divergence (still rejected).</li>
11990     *   <li>{@code group_comparison_t} with LHS=subquery: borderline
11991     *       grammar; defensively rejected (slice-26 boundary).</li>
11992     *   <li>{@code simple_comparison_t} with subqueries on BOTH
11993     *       sides: would require dual extraction; deferred to a future
11994     *       slice (slice-26 boundary). Caller emits a tuned message.</li>
11995     * </ul>
11996     */
11997    private static boolean isSubqueryOnLeftOfWrapper(TExpression e) {
11998        if (e == null) return false;
11999        EExpressionType t = e.getExpressionType();
12000        TExpression l = e.getLeftOperand();
12001        TExpression r = e.getRightOperand();
12002        boolean lhsIsSubq = l != null && l.getExpressionType() == EExpressionType.subquery_t;
12003        boolean rhsIsSubq = r != null && r.getExpressionType() == EExpressionType.subquery_t;
12004        if (t == EExpressionType.in_t) {
12005            return lhsIsSubq;
12006        }
12007        if (t == EExpressionType.group_comparison_t) {
12008            return lhsIsSubq;
12009        }
12010        if (t == EExpressionType.simple_comparison_t) {
12011            // Slice 26: lifted UNLESS both sides are subqueries.
12012            return lhsIsSubq && rhsIsSubq;
12013        }
12014        return false;
12015    }
12016
12017    /**
12018     * Slice 26: true iff {@code e} is a {@code simple_comparison_t}
12019     * whose BOTH operands are {@code subquery_t}. Used by the
12020     * post-extraction rejecter to emit a slice-26-specific tuned
12021     * message distinguishing this case from the slice-25 LHS-subquery
12022     * shapes.
12023     */
12024    private static boolean isComparisonWithBothSubqueries(TExpression e) {
12025        if (e == null) return false;
12026        if (e.getExpressionType() != EExpressionType.simple_comparison_t) return false;
12027        TExpression l = e.getLeftOperand();
12028        TExpression r = e.getRightOperand();
12029        return l != null && l.getExpressionType() == EExpressionType.subquery_t
12030                && r != null && r.getExpressionType() == EExpressionType.subquery_t;
12031    }
12032
12033    /**
12034     * Slice 23: after extraction, any remaining subquery-bearing expression in
12035     * the JOIN-ON tree is an unsupported shape — EXISTS that failed
12036     * extraction (inner-shape rejection), correlated wrappers, subquery on
12037     * left side, etc. Catch them here with a tuned message before
12038     * {@link #collectColumnRefsSkipping} would otherwise descend into them
12039     * and bind their inner refs against the outer scope.
12040     *
12041     * <p>Slice 25 (impl-review S2-fix): subquery-on-LEFT cases get a
12042     * tuned message (uses the slice-25 outer-shape prefix
12043     * "predicate subquery in JOIN ON:") via
12044     * {@link #findSubqueryOnLeftWrapper}.
12045     *
12046     * <p>Slice 26: a NEW first pass (before the slice-25 LHS-subquery
12047     * pass) detects {@code simple_comparison_t} wrappers with
12048     * subqueries on BOTH sides via
12049     * {@link #findComparisonWithBothSubqueries} and emits a slice-26
12050     * tuned message. Both-sides shape satisfies
12051     * {@link #isSubqueryOnLeftOfWrapper} (which slice 26 narrowed to
12052     * {@code lhsIsSubq && rhsIsSubq} for {@code simple_comparison_t}),
12053     * so ordering matters — without the both-sides first pass, the
12054     * slice-25 LHS-subquery wording would fire first.
12055     */
12056    private static void rejectAnyRemainingSubqueriesInJoinOn(TExpression onCond,
12057                                                              final Set<TExpression> extractedRoots) {
12058        rejectAnyRemainingSubqueriesFromClause(onCond, extractedRoots,
12059                PredicateClauseContext.JOIN_ON);
12060    }
12061
12062    /**
12063     * Slice 110 — clause-agnostic remaining-subquery rejecter. Mirrors
12064     * the slice-26 logic in {@link #rejectAnyRemainingSubqueriesInJoinOn}
12065     * but uses {@code ctx.*} codes / labels so the same body powers JOIN-ON
12066     * (slice 26) and UPDATE WHERE (slice 110).
12067     */
12068    private static void rejectAnyRemainingSubqueriesFromClause(TExpression onCond,
12069                                                                final Set<TExpression> extractedRoots,
12070                                                                final PredicateClauseContext ctx) {
12071        if (onCond == null) return;
12072        // Slice 26: tuned message for a comparison with subqueries on
12073        // BOTH sides. Fires at root or any descent. Checked BEFORE the
12074        // slice-25 subquery-on-LEFT pass because both-subqueries
12075        // satisfies isSubqueryOnLeftOfWrapper too — without this
12076        // ordering the slice-25 wording would fire first.
12077        TExpression bothSubqueriesWrapper = findComparisonWithBothSubqueries(onCond,
12078                extractedRoots);
12079        if (bothSubqueriesWrapper != null) {
12080            throw new SemanticIRBuildException(
12081                    Diagnostic.error(ctx.scalarComparisonBothSides,
12082                    "predicate subquery in " + ctx.clauseLabel + ": scalar comparison with "
12083                            + "subqueries on both sides is not supported yet "
12084                            + "(slice 26 admits exactly one subquery side, with a "
12085                            + "single column reference on the other side; rewrite "
12086                            + "as a join across a derived table or a CTE)", null));
12087        }
12088        // Slice 25 (impl-review S2-fix): tuned message for subquery
12089        // on the LEFT side of a wrapper. Fires at root or any descent.
12090        // Slice 26 narrowed isSubqueryOnLeftOfWrapper:
12091        // simple_comparison_t with LHS=subquery and non-subquery RHS is
12092        // now ADMITTED, so this rejecter only fires for in_t-LHS-subq /
12093        // group_comparison_t-LHS-subq (still rejected as asymmetric /
12094        // borderline shapes).
12095        TExpression leftSubqueryWrapper = findSubqueryOnLeftWrapper(onCond, extractedRoots);
12096        if (leftSubqueryWrapper != null) {
12097            throw new SemanticIRBuildException(
12098                    Diagnostic.error(ctx.predicateSubqueryOnLeft,
12099                    "predicate subquery in " + ctx.clauseLabel + ": "
12100                            + leftSubqueryWrapper.getExpressionType()
12101                            + " wrapper has a subquery on the LEFT side "
12102                            + "(only RHS-subquery IN / ANY-ALL-SOME and "
12103                            + "either-side scalar comparison are admitted; "
12104                            + "rewrite to put the subquery on the right side, "
12105                            + "or rewrite as a join across a derived table)", null));
12106        }
12107        // Root check: if the entire condition IS an EXISTS root and it WASN'T
12108        // extracted (meaning extraction threw an exception, which should not
12109        // reach here, OR some other root subquery shape), reject. The root
12110        // walker would otherwise miss it.
12111        TExpression rootSubject = isExistsRoot(onCond) ? unwrapExistsRoot(onCond) : onCond;
12112        if (rootSubject != null
12113                && (rootSubject.getExpressionType() == EExpressionType.subquery_t
12114                        || rootSubject.getSubQuery() != null
12115                        || rootSubject.getExpressionType() == EExpressionType.exists_t)
12116                && !extractedRoots.contains(rootSubject)) {
12117            throw new SemanticIRBuildException(
12118                    Diagnostic.error(ctx.genericSubqueryNotSupported,
12119                    "subquery in " + ctx.clauseLabel + " predicate is not supported yet "
12120                            + "(slice 26 accepts only uncorrelated EXISTS / "
12121                            + "IN-SELECT / scalar-comparison / ANY-ALL-SOME with "
12122                            + "single column-ref or constant-only inner projection "
12123                            + "and a single column ref on the non-subquery side)", null));
12124        }
12125        final boolean[] found = {false};
12126        onCond.acceptChildren(new TParseTreeVisitor() {
12127            int skipDepth = 0;
12128
12129            @Override
12130            public void preVisit(TExpression e) {
12131                if (found[0]) return;
12132                if (extractedRoots.contains(e)) {
12133                    skipDepth++;
12134                    return;
12135                }
12136                if (skipDepth > 0) return;
12137                if (e.getExpressionType() == EExpressionType.subquery_t
12138                        || e.getSubQuery() != null
12139                        || e.getExpressionType() == EExpressionType.exists_t) {
12140                    found[0] = true;
12141                }
12142            }
12143
12144            @Override
12145            public void postVisit(TExpression e) {
12146                if (extractedRoots.contains(e) && skipDepth > 0) {
12147                    skipDepth--;
12148                }
12149            }
12150        });
12151        if (found[0]) {
12152            throw new SemanticIRBuildException(
12153                    Diagnostic.error(ctx.genericSubqueryNotSupported,
12154                    "subquery in " + ctx.clauseLabel + " predicate is not supported yet "
12155                            + "(slice 26 accepts only uncorrelated EXISTS / "
12156                            + "IN-SELECT / scalar-comparison / ANY-ALL-SOME with "
12157                            + "single column-ref or constant-only inner projection "
12158                            + "and a single column ref on the non-subquery side)", null));
12159        }
12160    }
12161
12162    /**
12163     * Slice 26: walk {@code onCond} (root + every descendant outside
12164     * {@code extractedRoots}) and return the first
12165     * {@code simple_comparison_t} expression whose BOTH operands are
12166     * {@code subquery_t}. Returns null if none.
12167     */
12168    private static TExpression findComparisonWithBothSubqueries(TExpression onCond,
12169                                                                final Set<TExpression> extractedRoots) {
12170        if (onCond == null) return null;
12171        if (isComparisonWithBothSubqueries(onCond)) return onCond;
12172        final TExpression[] found = {null};
12173        onCond.acceptChildren(new TParseTreeVisitor() {
12174            int skipDepth = 0;
12175
12176            @Override
12177            public void preVisit(TExpression e) {
12178                if (found[0] != null) return;
12179                if (extractedRoots.contains(e)) {
12180                    skipDepth++;
12181                    return;
12182                }
12183                if (skipDepth > 0) return;
12184                if (isComparisonWithBothSubqueries(e)) {
12185                    found[0] = e;
12186                }
12187            }
12188
12189            @Override
12190            public void postVisit(TExpression e) {
12191                if (extractedRoots.contains(e) && skipDepth > 0) {
12192                    skipDepth--;
12193                }
12194            }
12195        });
12196        return found[0];
12197    }
12198
12199    /**
12200     * Slice 23: variant of {@link #rejectWindowFunctionInScope} that skips
12201     * subtrees in {@code skipRoots}. The outer-SELECT JOIN-ON path passes the
12202     * extracted EXISTS roots so a window function inside an extracted body
12203     * is NOT incorrectly rejected as a window in the outer JOIN-ON. (The
12204     * inner statement's own buildSelectStatement does its own
12205     * rejectWindowFunctionInScope sweeps on WHERE / GROUP BY / HAVING /
12206     * ORDER BY, so legitimate inner-window violations still surface.)
12207     */
12208    private static void rejectWindowFunctionInScopeSkipping(
12209            gudusoft.gsqlparser.nodes.TParseTreeNode root,
12210            String clauseLabel,
12211            final Set<TExpression> skipRoots) {
12212        if (root == null) return;
12213        // Root fast path: if the root itself IS a skipped subtree, nothing to
12214        // check. (acceptChildren wouldn't see the root anyway.)
12215        if (root instanceof TExpression && skipRoots.contains(root)) {
12216            return;
12217        }
12218        final boolean[] found = {false};
12219        root.acceptChildren(new TParseTreeVisitor() {
12220            int skipDepth = 0;
12221
12222            @Override
12223            public void preVisit(TExpression e) {
12224                if (skipRoots.contains(e)) {
12225                    skipDepth++;
12226                }
12227            }
12228
12229            @Override
12230            public void postVisit(TExpression e) {
12231                if (skipRoots.contains(e) && skipDepth > 0) {
12232                    skipDepth--;
12233                }
12234            }
12235
12236            @Override
12237            public void preVisit(TFunctionCall fn) {
12238                if (found[0] || skipDepth > 0) return;
12239                if (fn.getWindowDef() != null) found[0] = true;
12240            }
12241        });
12242        if (found[0]) {
12243            throw new SemanticIRBuildException(
12244                    Diagnostic.error(DiagnosticCode.CLAUSE_WINDOW_FUNCTION_LEAK,
12245                    clauseLabel + " contains a window function (OVER (...)); "
12246                            + "window functions are not allowed in " + clauseLabel
12247                            + " per standard SQL", root));
12248        }
12249    }
12250
12251    /**
12252     * Slice 23: variant of {@link #collectColumnRefs} that skips subtrees in
12253     * {@code skipRoots}. Used by the outer-SELECT JOIN-ON path so the
12254     * extracted EXISTS bodies' inner refs do not leak into outer
12255     * {@code joinColumnRefs}.
12256     */
12257    private static List<ColumnRef> collectColumnRefsSkipping(
12258            gudusoft.gsqlparser.nodes.TParseTreeNode root,
12259            final NameBindingProvider provider,
12260            final Set<TExpression> skipRoots) {
12261        // Slice 31 refactor: delegate to the extended variant with no
12262        // TWithinGroup skips. Behavior preserved exactly for the
12263        // existing slice-28 caller (outer JOIN-ON path at line ~3021)
12264        // and for the new slice-31 caller via
12265        // {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses}.
12266        return collectColumnRefsSkippingExtended(root, provider,
12267                skipRoots, Collections.<TWithinGroup>emptySet());
12268    }
12269
12270    /**
12271     * Slice 28: collect every non-null {@link TFunctionCall#getFilterClause()}
12272     * subtree reachable from {@code root}. The returned set is identity-keyed
12273     * (uses {@link IdentityHashMap}) — required because the parser may yield
12274     * two structurally-equal FILTER WHERE expressions with different
12275     * identities; a value-keyed set would coalesce them and the
12276     * downstream {@link #collectColumnRefsSkipping} call would skip only one.
12277     *
12278     * <p>Contract: {@code root} is one of {@link TResultColumn} or
12279     * {@link TExpression}. Both call sites
12280     * ({@link #collectColumnRefsExcludingFilterClauses} for projection
12281     * source collection; the slice-28 correlation walk inside
12282     * {@link #extractOnePredicateSubqueryBody}) pass values of those
12283     * two types. Other {@code TParseTreeNode} subclasses are accepted
12284     * defensively (the visitor scan still works) but the top-level
12285     * direct-check fast paths only cover {@code TResultColumn} and
12286     * {@code TExpression}; this is intentional — adding a
12287     * {@code TFunctionCall} fast path would be reachable only if a
12288     * future call site were added with a {@code TFunctionCall} root.
12289     *
12290     * <p>Visitor-driven; descends into all expression subtrees the
12291     * standard {@code TFunctionCall.acceptChildren} path visits — function
12292     * args, {@code OVER} (analyticFunction / windowDef), FILTER, CASE arms,
12293     * parenthesised sub-expressions, AND the
12294     * {@code windowDef.withinGroup.orderBy} path on Oracle / MSSQL /
12295     * SparkSQL parsers. Note: the PostgreSQL parser stores WITHIN GROUP
12296     * on the direct {@code fn.withinGroup} field, which
12297     * {@code TFunctionCall.acceptChildren} does NOT visit, so PG WITHIN
12298     * GROUP ORDER BY refs are invisible to this collector. Slice 29
12299     * relies on that asymmetry to admit PG WITHIN GROUP aggregates in
12300     * predicate-subquery inner projections without a source-skip.
12301     * Used by:
12302     * <ul>
12303     *   <li>{@link #collectColumnRefsExcludingFilterClauses} (Pass 1) — the
12304     *       global source-skip in {@link #buildOutputColumns}.</li>
12305     *   <li>{@link #extractOnePredicateSubqueryBody}'s slice-28 correlation
12306     *       walk — projection-only correlation check for FILTER predicate
12307     *       refs.</li>
12308     * </ul>
12309     */
12310    private static Set<TExpression> collectFilterClauses(
12311            gudusoft.gsqlparser.nodes.TParseTreeNode root) {
12312        final Set<TExpression> out =
12313                Collections.newSetFromMap(new IdentityHashMap<TExpression, Boolean>());
12314        if (root == null) return out;
12315        // Visitor descends into all expression subtrees; preVisit on
12316        // TFunctionCall records the filter clause if present. The
12317        // visitor's preVisit(TFunctionCall) does NOT fire for a top-level
12318        // function_t expression's root TFunctionCall (matches the slice-13 /
12319        // slice-27 visitor descent behavior); the defensive direct checks
12320        // below cover the top-level case for the two supported root types.
12321        root.acceptChildren(new TParseTreeVisitor() {
12322            @Override
12323            public void preVisit(TFunctionCall fn) {
12324                TExpression f = fn.getFilterClause();
12325                if (f != null) out.add(f);
12326            }
12327        });
12328        if (root instanceof TExpression) {
12329            TExpression e = (TExpression) root;
12330            if (e.getExpressionType() == EExpressionType.function_t) {
12331                TFunctionCall fn = e.getFunctionCall();
12332                if (fn != null && fn.getFilterClause() != null) {
12333                    out.add(fn.getFilterClause());
12334                }
12335            }
12336        } else if (root instanceof TResultColumn) {
12337            TResultColumn rc = (TResultColumn) root;
12338            TExpression e = rc.getExpr();
12339            if (e != null && e.getExpressionType() == EExpressionType.function_t) {
12340                TFunctionCall fn = e.getFunctionCall();
12341                if (fn != null && fn.getFilterClause() != null) {
12342                    out.add(fn.getFilterClause());
12343                }
12344            }
12345        }
12346        return out;
12347    }
12348
12349    /**
12350     * Slice 30: collect every qualifier alias (the {@code x} of an
12351     * {@code x.region} TObjectName column reference) reachable from
12352     * {@code root}, without going through the resolver. Used by the
12353     * slice-30 WITHIN GROUP ORDER BY correlation walk in
12354     * {@link #extractOnePredicateSubqueryBody}.
12355     *
12356     * <p>Why bypass the resolver: PostgreSQL's parser stores WITHIN GROUP
12357     * on the direct {@code fn.withinGroup} field. {@code TFunctionCall.acceptChildren}
12358     * does NOT descend into that field, AND Resolver2 follows the same
12359     * traversal, so its {@code ResolutionResult} is null on TObjectName
12360     * nodes inside {@code fn.withinGroup.orderBy}. Calling
12361     * {@link #collectColumnRefs} on the WG ORDER BY would route through
12362     * {@code provider.bindColumn} → {@code NOT_FOUND}, and the
12363     * {@code non-exact column bindings} check would throw on legitimate
12364     * non-correlated refs. The qualifier-only collector here reads the
12365     * alias straight off the TObjectName via {@link TObjectName#getTableString()},
12366     * matching the slice-23 correlation invariant: qualified refs only.
12367     * Unqualified refs are out of scope (same schema-less limitation as
12368     * the rest of slice-23).
12369     *
12370     * <p>Returned in iteration order (so error messages identify the
12371     * first offender) using a list rather than a set.
12372     */
12373    private static List<String> collectQualifierAliases(
12374            gudusoft.gsqlparser.nodes.TParseTreeNode root) {
12375        final List<String> out = new ArrayList<>();
12376        if (root == null) return out;
12377        root.acceptChildren(new TParseTreeVisitor() {
12378            @Override
12379            public void preVisit(TObjectName node) {
12380                if (node.getDbObjectType() != EDbObjectType.column) return;
12381                String t = node.getTableString();
12382                if (t != null && !t.isEmpty()) out.add(t);
12383            }
12384        });
12385        return out;
12386    }
12387
12388    /**
12389     * Slice 30 / Slice 31: collect every WITHIN-GROUP {@code ORDER BY}
12390     * clause anywhere in the subtree, identity-keyed so two
12391     * structurally-equal order-by clauses don't collapse.
12392     *
12393     * <p>Two attachment styles are covered:
12394     * <ul>
12395     *   <li><b>Slice 30 — direct attachment</b>: PostgreSQL /
12396     *       Snowflake / DB2 / SparkSQL parsers store WITHIN GROUP on
12397     *       {@code fn.getWithinGroup()}. The default
12398     *       {@code TFunctionCall.acceptChildren} does NOT descend into
12399     *       that field, so the slice-23 {@link #collectAllInnerRefs}
12400     *       walk is blind to outer-alias references inside the ORDER BY.
12401     *       The slice-30 correlation walk needs explicit access, hence
12402     *       this helper.</li>
12403     *   <li><b>Slice 31 — windowDef attachment</b>: Oracle / MSSQL
12404     *       parsers store WITHIN GROUP on
12405     *       {@code fn.getWindowDef().getWithinGroup()} when the
12406     *       windowDef is {@link #isWithinGroupOnlyWindowDef
12407     *       WITHIN-GROUP-only}. The default {@code acceptChildren}
12408     *       DOES descend through {@code windowDef.acceptChildren}
12409     *       which calls {@code withinGroup.acceptChildren} which calls
12410     *       {@code orderBy.acceptChildren}, so column refs would
12411     *       already appear in {@link #collectAllInnerRefs}-driven
12412     *       walks. <b>However</b>, the slice-31 source-skip in
12413     *       {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses}
12414     *       removes those refs from {@link OutputColumn#getSources()};
12415     *       the slice-23 correlation walk only sees {@code OutputColumn.sources}
12416     *       for the projection bucket, so a correlated
12417     *       {@code LISTAGG(x.id) WITHIN GROUP (ORDER BY e.region)} on
12418     *       Oracle would slip past the slice-23 loop after the source-
12419     *       skip. This dual-attachment helper closes that asymmetry.
12420     *       (Inner WHERE / JOIN / HAVING / ORDER BY clauses are
12421     *       independently rejected by the slice-13 strict
12422     *       {@code rejectWindowFunctionInScope} family — Oracle
12423     *       LISTAGG WG inside a clause never reaches this helper.)</li>
12424     * </ul>
12425     *
12426     * <p>The visitor's {@code preVisit(TFunctionCall)} does NOT fire for
12427     * a top-level {@code function_t} expression's root TFunctionCall;
12428     * defensive direct checks below cover the top-level case for both
12429     * {@link TExpression} and {@link TResultColumn} roots, mirroring
12430     * {@link #collectFilterClauses}.
12431     *
12432     * <p>Used by {@link #extractOnePredicateSubqueryBody}'s
12433     * projection-only correlation walk (line ~3690) for both
12434     * direct-attachment (slice 30) and windowDef-attachment (slice 31)
12435     * outer-alias references inside WITHIN GROUP ORDER BY.
12436     */
12437    private static Set<TOrderBy> collectDirectWithinGroupOrderBys(
12438            gudusoft.gsqlparser.nodes.TParseTreeNode root) {
12439        final Set<TOrderBy> out =
12440                Collections.newSetFromMap(new IdentityHashMap<TOrderBy, Boolean>());
12441        if (root == null) return out;
12442        root.acceptChildren(new TParseTreeVisitor() {
12443            @Override
12444            public void preVisit(TFunctionCall fn) {
12445                TOrderBy direct = fn.getWithinGroup() == null
12446                        ? null : fn.getWithinGroup().getOrderBy();
12447                if (direct != null) out.add(direct);
12448                TWindowDef wd = fn.getWindowDef();
12449                if (isWithinGroupOnlyWindowDef(wd)) {
12450                    TOrderBy wdOb = wd.getWithinGroup().getOrderBy();
12451                    if (wdOb != null) out.add(wdOb);
12452                }
12453            }
12454        });
12455        if (root instanceof TExpression) {
12456            TExpression e = (TExpression) root;
12457            if (e.getExpressionType() == EExpressionType.function_t) {
12458                addWithinGroupOrderByIfPresent(e.getFunctionCall(), out);
12459            }
12460        } else if (root instanceof TResultColumn) {
12461            TResultColumn rc = (TResultColumn) root;
12462            TExpression e = rc.getExpr();
12463            if (e != null && e.getExpressionType() == EExpressionType.function_t) {
12464                addWithinGroupOrderByIfPresent(e.getFunctionCall(), out);
12465            }
12466        }
12467        return out;
12468    }
12469
12470    /**
12471     * Slice 30 / 31: helper for top-level direct check inside
12472     * {@link #collectDirectWithinGroupOrderBys}. Adds the WITHIN GROUP
12473     * ORDER BY clause to {@code out} for whichever attachment style
12474     * the function carries.
12475     */
12476    private static void addWithinGroupOrderByIfPresent(TFunctionCall fn,
12477                                                       Set<TOrderBy> out) {
12478        if (fn == null) return;
12479        if (fn.getWithinGroup() != null && fn.getWithinGroup().getOrderBy() != null) {
12480            out.add(fn.getWithinGroup().getOrderBy());
12481        }
12482        TWindowDef wd = fn.getWindowDef();
12483        if (isWithinGroupOnlyWindowDef(wd) && wd.getWithinGroup().getOrderBy() != null) {
12484            out.add(wd.getWithinGroup().getOrderBy());
12485        }
12486    }
12487
12488    /**
12489     * Slice 28: variant of {@link #collectColumnRefs} that excludes column
12490     * refs inside {@code FILTER (WHERE ...)} clauses on any function call
12491     * in the subtree. Used by {@link #buildOutputColumns} for ALL output
12492     * source collection so the IR's per-projection {@code OutputColumn.sources}
12493     * matches dlineage's lineage-relationship view (which omits FILTER
12494     * predicate column refs entirely; see slice-28 probes Q1–Q4).
12495     *
12496     * <p>For projections that contain no FILTER aggregates (the common case),
12497     * Pass 1 yields zero skip-roots and Pass 2 reduces to the plain
12498     * {@link #collectColumnRefs}. The asymmetry between projection sources
12499     * (FILTER-skipped) and clause refs ({@code filterColumnRefs},
12500     * {@code joinColumnRefs}, {@code groupByColumnRefs},
12501     * {@code havingColumnRefs}, {@code orderByColumnRefs} — NOT
12502     * FILTER-skipped) is intentional: it keeps the existing
12503     * {@link #collectAllInnerRefs}-driven correlation check at line ~3603
12504     * sufficient for FILTER refs landing in non-projection clauses, while
12505     * the slice-28 correlation walk in {@link #extractOnePredicateSubqueryBody}
12506     * covers projection-FILTER refs.
12507     */
12508    private static List<ColumnRef> collectColumnRefsExcludingFilterClauses(
12509            gudusoft.gsqlparser.nodes.TParseTreeNode root,
12510            NameBindingProvider provider) {
12511        Set<TExpression> filterClauses = collectFilterClauses(root);
12512        if (filterClauses.isEmpty()) {
12513            return collectColumnRefs(root, provider);
12514        }
12515        return collectColumnRefsSkipping(root, provider, filterClauses);
12516    }
12517
12518    /**
12519     * Slice 31: identity-keyed set of every {@link TWithinGroup} reachable
12520     * from {@code root} via {@code fn.getWindowDef().getWithinGroup()} —
12521     * the Oracle / MSSQL attachment style for plain {@code WITHIN GROUP
12522     * (ORDER BY ...)} aggregates. Used as additional skip-roots in
12523     * {@link #collectColumnRefsExcludingFilterAndWithinGroupClauses} so
12524     * the column refs inside the WITHIN GROUP ORDER BY do NOT enter
12525     * {@link OutputColumn#getSources()} on Oracle / MSSQL — matching
12526     * dlineage's omission of those refs from {@code fdr clause="on"}
12527     * sources (probe Q1 / Q3 / Q4 / Q5 in {@code /tmp/probe31}).
12528     *
12529     * <p>Discriminator: {@link #isWithinGroupOnlyWindowDef}. OVER-bearing
12530     * windowDefs (real window functions) are NOT collected here — the
12531     * slice-13 invariant rejecters keep them rejected before this
12532     * collector ever fires for projection sources, so they cannot
12533     * reach the source-skip in practice. Defensive: even if they did,
12534     * the discriminator excludes them so PARTITION BY / OVER ORDER BY
12535     * column refs (slice-13 / slice-19 alias-bound contracts) keep
12536     * their existing semantics.
12537     *
12538     * <p>The PostgreSQL direct {@code fn.getWithinGroup()} attachment
12539     * is NOT collected here because PG's
12540     * {@code TFunctionCall.acceptChildren} does not descend into the
12541     * direct field — slice 29 relied on that asymmetry to admit PG
12542     * WITHIN GROUP aggregates without any source-skip; slice 31
12543     * preserves that asymmetry on PG.
12544     */
12545    private static Set<TWithinGroup> collectWithinGroupClausesFromWindowDef(
12546            gudusoft.gsqlparser.nodes.TParseTreeNode root) {
12547        final Set<TWithinGroup> out =
12548                Collections.newSetFromMap(new IdentityHashMap<TWithinGroup, Boolean>());
12549        if (root == null) return out;
12550        root.acceptChildren(new TParseTreeVisitor() {
12551            @Override
12552            public void preVisit(TFunctionCall fn) {
12553                TWindowDef wd = fn.getWindowDef();
12554                if (isWithinGroupOnlyWindowDef(wd)) {
12555                    out.add(wd.getWithinGroup());
12556                }
12557            }
12558        });
12559        if (root instanceof TExpression) {
12560            TExpression e = (TExpression) root;
12561            if (e.getExpressionType() == EExpressionType.function_t) {
12562                addWithinGroupFromWindowDefIfPresent(e.getFunctionCall(), out);
12563            }
12564        } else if (root instanceof TResultColumn) {
12565            TResultColumn rc = (TResultColumn) root;
12566            TExpression e = rc.getExpr();
12567            if (e != null && e.getExpressionType() == EExpressionType.function_t) {
12568                addWithinGroupFromWindowDefIfPresent(e.getFunctionCall(), out);
12569            }
12570        }
12571        return out;
12572    }
12573
12574    /**
12575     * Slice 31: helper for top-level direct check inside
12576     * {@link #collectWithinGroupClausesFromWindowDef}. Mirrors the
12577     * slice-30 {@link #addWithinGroupOrderByIfPresent} helper but adds
12578     * the {@link TWithinGroup} node itself to {@code out} (the entire
12579     * WITHIN GROUP subtree is the skip-root, not just its ORDER BY).
12580     */
12581    private static void addWithinGroupFromWindowDefIfPresent(TFunctionCall fn,
12582                                                             Set<TWithinGroup> out) {
12583        if (fn == null) return;
12584        TWindowDef wd = fn.getWindowDef();
12585        if (isWithinGroupOnlyWindowDef(wd)) {
12586            out.add(wd.getWithinGroup());
12587        }
12588    }
12589
12590    /**
12591     * Slice 31: extends slice-28's filter-skipping projection-source
12592     * collector with an additional skip for Oracle / MSSQL
12593     * {@code fn.windowDef.withinGroup} subtrees. Reduces to slice-28
12594     * behavior on PostgreSQL (where windowDef is null) and to plain
12595     * {@link #collectColumnRefs} when neither FILTER nor WITHIN GROUP
12596     * is present.
12597     *
12598     * <p>Used by {@link #buildOutputColumns} for ALL projection source
12599     * collection (predicate-body short-circuit at line ~4952 and
12600     * normal projection loop at line ~5069) so the IR's
12601     * {@link OutputColumn#getSources()} matches dlineage's
12602     * lineage-relationship view across PG / Oracle / MSSQL. The
12603     * non-projection clause-bucket collectors
12604     * ({@code filterColumnRefs}, {@code joinColumnRefs},
12605     * {@code groupByColumnRefs}, {@code havingColumnRefs},
12606     * {@code orderByColumnRefs}) intentionally keep using plain
12607     * {@link #collectColumnRefs} — the slice-13 strict
12608     * {@code rejectWindowFunctionInScope} family rejects any
12609     * {@code wd != null} function in those clauses BEFORE collection
12610     * descends into them, so WITHIN GROUP refs cannot leak into
12611     * clause buckets in practice.
12612     */
12613    private static List<ColumnRef> collectColumnRefsExcludingFilterAndWithinGroupClauses(
12614            gudusoft.gsqlparser.nodes.TParseTreeNode root,
12615            NameBindingProvider provider) {
12616        Set<TExpression> filterClauses = collectFilterClauses(root);
12617        Set<TWithinGroup> withinGroupClauses = collectWithinGroupClausesFromWindowDef(root);
12618        if (filterClauses.isEmpty() && withinGroupClauses.isEmpty()) {
12619            return collectColumnRefs(root, provider);
12620        }
12621        return collectColumnRefsSkippingExtended(root, provider,
12622                filterClauses, withinGroupClauses);
12623    }
12624
12625    /**
12626     * Slice 31: variant of {@link #collectColumnRefsSkipping} that
12627     * additionally skips column refs inside {@link TWithinGroup}
12628     * subtrees in {@code wgSkipRoots} (Oracle / MSSQL
12629     * {@code fn.windowDef.withinGroup} attachment). The existing
12630     * {@code exprSkipRoots} carries the slice-28 FILTER subtrees.
12631     * Returns column refs in iteration order.
12632     *
12633     * <p>Refactor note: {@link #collectColumnRefsSkipping} now delegates
12634     * to this method with an empty {@code wgSkipRoots} set so its
12635     * behavior is preserved exactly for legacy callers (the outer
12636     * JOIN-ON path at line ~3021).
12637     */
12638    private static List<ColumnRef> collectColumnRefsSkippingExtended(
12639            gudusoft.gsqlparser.nodes.TParseTreeNode root,
12640            final NameBindingProvider provider,
12641            final Set<TExpression> exprSkipRoots,
12642            final Set<TWithinGroup> wgSkipRoots) {
12643        final LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
12644        final List<String> rejects = new ArrayList<>();
12645        // Root fast path: if root IS a skipped TExpression subtree, return empty.
12646        if (root instanceof TExpression && exprSkipRoots.contains(root)) {
12647            return new ArrayList<>(refs);
12648        }
12649        root.acceptChildren(new TParseTreeVisitor() {
12650            int skipDepth = 0;
12651            int nestedSelectDepth = 0;
12652
12653            @Override
12654            public void preVisit(TExpression e) {
12655                if (exprSkipRoots.contains(e)) skipDepth++;
12656            }
12657
12658            @Override
12659            public void postVisit(TExpression e) {
12660                if (exprSkipRoots.contains(e) && skipDepth > 0) skipDepth--;
12661            }
12662
12663            @Override
12664            public void preVisit(TWithinGroup wg) {
12665                if (wgSkipRoots.contains(wg)) skipDepth++;
12666            }
12667
12668            @Override
12669            public void postVisit(TWithinGroup wg) {
12670                if (wgSkipRoots.contains(wg) && skipDepth > 0) skipDepth--;
12671            }
12672
12673            @Override
12674            public void preVisit(TSelectSqlStatement nested) {
12675                nestedSelectDepth++;
12676            }
12677
12678            @Override
12679            public void postVisit(TSelectSqlStatement nested) {
12680                nestedSelectDepth--;
12681            }
12682
12683            @Override
12684            public void preVisit(TObjectName node) {
12685                if (skipDepth > 0) return;
12686                if (nestedSelectDepth > 0) return;
12687                appendMergedOrBoundColumnRef(node, provider, refs, rejects);
12688            }
12689        });
12690        if (!rejects.isEmpty()) {
12691            throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.COLUMN_BINDING_NON_EXACT, "non-exact column bindings: " + rejects, null));
12692        }
12693        return new ArrayList<>(refs);
12694    }
12695
12696    /**
12697     * Reject join shapes that would silently drop predicate semantics:
12698     * semi/anti, vendor-specific kinds; predicate-bearing joins with
12699     * no ON and no USING clause; CROSS / NATURAL JOIN with ON or
12700     * USING. Slice 63 admits {@code CROSS JOIN} via
12701     * {@link #ALLOWED_ON_LESS_JOIN_TYPES}. Slice 64 admits
12702     * {@code JOIN ... USING (...)} on predicate join types; the
12703     * per-key {@code joinColumnRefs} emission is handled in
12704     * {@link #buildRelations}. Slice 66 admits {@code NATURAL JOIN}
12705     * via {@link #NATURAL_JOIN_TYPES} when catalog metadata is
12706     * available on both sides; the catalog-required reject fires
12707     * inside {@link #buildRelations}, not here.
12708     */
12709    private static void rejectUnsupportedJoinShape(TJoinItem item) {
12710        EJoinType jt = item.getJoinType();
12711        boolean isPredicate = jt != null && ALLOWED_PREDICATE_JOIN_TYPES.contains(jt);
12712        boolean isOnLess = jt != null && ALLOWED_ON_LESS_JOIN_TYPES.contains(jt);
12713        boolean isNatural = isNaturalJoinType(jt);
12714        if (!isPredicate && !isOnLess && !isNatural) {
12715            throw new SemanticIRBuildException(
12716                    Diagnostic.error(DiagnosticCode.UNSUPPORTED_JOIN_TYPE,
12717                    "join type " + jt + " is not supported yet; "
12718                            + "only INNER/LEFT/RIGHT/FULL [OUTER] JOIN ... ON, "
12719                            + "JOIN ... USING (...), CROSS JOIN, and "
12720                            + "NATURAL [INNER/LEFT/RIGHT/FULL [OUTER]] JOIN are accepted", item));
12721        }
12722        boolean hasUsing = item.getUsingColumns() != null
12723                && item.getUsingColumns().size() > 0;
12724        boolean hasOn = item.getOnCondition() != null;
12725        if (isNatural) {
12726            if (hasOn) {
12727                throw new SemanticIRBuildException(
12728                        Diagnostic.error(DiagnosticCode.NATURAL_WITH_ON,
12729                        "NATURAL JOIN must not carry an ON condition; rewrite "
12730                                + "as JOIN ... ON, or drop the NATURAL keyword", item));
12731            }
12732            if (hasUsing) {
12733                throw new SemanticIRBuildException(
12734                        Diagnostic.error(DiagnosticCode.NATURAL_WITH_USING,
12735                        "NATURAL JOIN must not carry a USING clause; choose "
12736                                + "either NATURAL or USING, not both", item));
12737            }
12738            return;
12739        }
12740        if (isOnLess) {
12741            if (hasOn) {
12742                throw new SemanticIRBuildException(
12743                        Diagnostic.error(DiagnosticCode.CROSS_WITH_ON,
12744                        "CROSS JOIN must not carry an ON condition; rewrite "
12745                                + "as INNER JOIN ... ON, or drop the ON clause", item));
12746            }
12747            if (hasUsing) {
12748                throw new SemanticIRBuildException(
12749                        Diagnostic.error(DiagnosticCode.CROSS_WITH_USING,
12750                        "CROSS JOIN must not carry a USING clause; rewrite "
12751                                + "as INNER JOIN ... USING (...) or drop USING", item));
12752            }
12753            return;
12754        }
12755        // Predicate-bearing path.
12756        if (hasUsing && hasOn) {
12757            throw new SemanticIRBuildException(
12758                    Diagnostic.error(DiagnosticCode.JOIN_WITH_BOTH_ON_AND_USING,
12759                    "JOIN cannot carry both ON and USING; choose one", item));
12760        }
12761        if (!hasUsing && !hasOn) {
12762            throw new SemanticIRBuildException(
12763                    Diagnostic.error(DiagnosticCode.JOIN_MISSING_ON_OR_USING,
12764                    "JOIN with no ON or USING condition is not supported yet "
12765                            + "(implicit joins must be explicit and supported)", item));
12766        }
12767    }
12768
12769    /**
12770     * Slice 64 — populate {@code joinColumnRefs} for a USING-shaped
12771     * join item. Emits refs in <b>left-then-right</b> order per key.
12772     *
12773     * <p>Column-source resolution looks at two sources, in order:
12774     * <ol>
12775     *   <li>The catalog (via
12776     *       {@link NameBindingProvider#getRelationColumnNames(TTable)})
12777     *       for base tables;</li>
12778     *   <li>The slice-60 in-scope-relation-columns map (via
12779     *       {@link NameBindingProvider#getInScopeRelationColumns()})
12780     *       for CTE and FROM-subquery relations, keyed by effective
12781     *       alias.</li>
12782     * </ol>
12783     *
12784     * <p>Left side uses these two sources to narrow to the prior
12785     * relations that actually declare the USING key, walking
12786     * {@code topJoin.getTable()} then
12787     * {@code items[0..itemIndex-1].getTable()} in FROM order.
12788     *
12789     * <p>Right side is always {@code item.getTable()}. When either
12790     * source declares the right relation's columns and the USING key
12791     * is absent there, the build is failed-fast with a
12792     * non-exact-binding-style reject — matching what the resolver
12793     * does for plain {@code SELECT k} where {@code k} doesn't exist.
12794     *
12795     * <p>When neither source has any column info for the prior
12796     * relations (no catalog and no in-scope map), fall back to
12797     * emitting one ref for the immediately-prior relation so the
12798     * slice-64 admission still works without a catalog. Same
12799     * fallback applies to the right side (emit unconditionally).
12800     *
12801     * <p>This matches resolver2's all-chain-tables linkage
12802     * ({@code ScopeBuilder.preVisit(TJoinItem)}) for the cases where
12803     * catalog/in-scope info is missing, without adopting its
12804     * over-approximation when info IS available.
12805     */
12806    private static void populateUsingJoinRefs(TJoin topJoin,
12807                                              TJoinItemList items,
12808                                              int itemIndex,
12809                                              TTable rightTable,
12810                                              TObjectNameList usingCols,
12811                                              NameBindingProvider provider,
12812                                              List<ColumnRef> joinRefsOut) {
12813        // Slice 66: collect the SQL-written USING key spellings and
12814        // delegate to the shared {@link #emitMergedJoinRefs} helper
12815        // which serves both USING (this path) and NATURAL.
12816        List<String> keyNames = new ArrayList<>(usingCols.size());
12817        for (int k = 0; k < usingCols.size(); k++) {
12818            TObjectName usingKey = usingCols.getObjectName(k);
12819            if (usingKey == null) continue;
12820            String keyName = usingKey.getColumnNameOnly();
12821            if (keyName == null || keyName.isEmpty()) continue;
12822            keyNames.add(keyName);
12823        }
12824        emitMergedJoinRefs(JoinKind.USING, keyNames, topJoin, items,
12825                itemIndex, rightTable, provider, joinRefsOut);
12826    }
12827
12828    /**
12829     * Slice 66 — discriminator for {@link #emitMergedJoinRefs}. USING
12830     * comes from a syntactic clause and enforces left-and-right
12831     * "key-must-exist" rejects; NATURAL comes from catalog inference
12832     * and never has a missing key by construction.
12833     */
12834    private enum JoinKind { USING, NATURAL }
12835
12836    /**
12837     * Slice 66 — shared emit-refs helper used by USING and NATURAL.
12838     * Emits per-key {@code joinColumnRefs} in <b>left-then-right</b>
12839     * order, walking every prior FROM relation for the left side. The
12840     * {@code kind} discriminator controls:
12841     *
12842     * <ul>
12843     *   <li><b>CTE-explicit-column-list deferral</b>: applies to BOTH
12844     *       (the diagnostic wording mentions JOIN kind);</li>
12845     *   <li><b>Right-side "missing key" reject</b>: USING-only —
12846     *       NATURAL keys come from catalog intersection so the key
12847     *       must be present on the right by construction;</li>
12848     *   <li><b>Left-side "missing key" reject</b>: USING-only — same
12849     *       rationale.</li>
12850     * </ul>
12851     *
12852     * <p>Spelling: the caller supplies the emitted spelling (USING
12853     * passes the SQL-written spelling; NATURAL passes the catalog-
12854     * declared spelling of the first contributor — see
12855     * {@link #naturalSharedKeys}).
12856     */
12857    private static void emitMergedJoinRefs(JoinKind kind,
12858                                           List<String> keyNames,
12859                                           TJoin topJoin,
12860                                           TJoinItemList items,
12861                                           int itemIndex,
12862                                           TTable rightTable,
12863                                           NameBindingProvider provider,
12864                                           List<ColumnRef> joinRefsOut) {
12865        String rightAlias = effectiveAliasOf(rightTable);
12866        List<TTable> priorRelations = new ArrayList<>();
12867        if (topJoin.getTable() != null) {
12868            priorRelations.add(topJoin.getTable());
12869        }
12870        for (int j = 0; j < itemIndex; j++) {
12871            TJoinItem prevItem = items.getJoinItem(j);
12872            if (prevItem != null && prevItem.getTable() != null) {
12873                priorRelations.add(prevItem.getTable());
12874            }
12875        }
12876        // Slice 60 / 64 / 66 originally rejected USING / NATURAL joins
12877        // against a CTE with an explicit column list because the CTE
12878        // body's StatementGraph published inner-projection names rather
12879        // than the renamed list. Slice 103 lifts that rejection by
12880        // wiring the slice-102 rename helper into the SELECT-side CTE
12881        // walker; the published column list now matches the explicit
12882        // list, so lookupRelationColumnNames returns the renamed names
12883        // from the in-scope map and the merged-key emit below works.
12884        // `MERGED_JOIN_AGAINST_CTE_WITH_EXPLICIT_COLUMN_LIST` stays
12885        // declared-but-unreached (slice 71/72/82/86/95/96/97/98/99/100/
12886        // 101/102 precedent).
12887        for (String keyName : keyNames) {
12888            if (keyName == null || keyName.isEmpty()) continue;
12889
12890            // Left side FIRST (matches ON-clause natural reading order).
12891            // For USING: priorRelations with metadata-unknown or
12892            // declared-key emit a ref; missing-key skips. If no ref
12893            // emitted and all priors had known info → USING-only
12894            // reject (NATURAL never reaches this because the catalog
12895            // intersection guarantees at least one contributor).
12896            boolean emittedAnyLeft = false;
12897            boolean allPriorsHadColumnInfo = true;
12898            for (TTable prior : priorRelations) {
12899                List<String> cols = lookupRelationColumnNames(prior, provider);
12900                if (cols == null) {
12901                    allPriorsHadColumnInfo = false;
12902                    joinRefsOut.add(new ColumnRef(
12903                            effectiveAliasOf(prior), keyName));
12904                    emittedAnyLeft = true;
12905                    continue;
12906                }
12907                for (String c : cols) {
12908                    if (c != null && c.equalsIgnoreCase(keyName)) {
12909                        joinRefsOut.add(new ColumnRef(
12910                                effectiveAliasOf(prior), keyName));
12911                        emittedAnyLeft = true;
12912                        break;
12913                    }
12914                }
12915            }
12916            if (kind == JoinKind.USING
12917                    && !emittedAnyLeft && allPriorsHadColumnInfo
12918                    && !priorRelations.isEmpty()) {
12919                throw new SemanticIRBuildException(
12920                        Diagnostic.error(DiagnosticCode.USING_KEY_NOT_DECLARED,
12921                        "USING key '" + keyName + "' is not declared on "
12922                                + "any left-side relation; check that the "
12923                                + "key exists on at least one of the "
12924                                + "joined-in relations", rightTable));
12925            }
12926
12927            // Right side. USING: must exist OR catalog unknown.
12928            // NATURAL: by construction the key is in right's catalog.
12929            // For the unknown-catalog case we still emit (over-approximate).
12930            List<String> rightCols = lookupRelationColumnNames(rightTable, provider);
12931            if (rightCols == null) {
12932                joinRefsOut.add(new ColumnRef(rightAlias, keyName));
12933            } else {
12934                boolean rightHasKey = false;
12935                for (String c : rightCols) {
12936                    if (c != null && c.equalsIgnoreCase(keyName)) {
12937                        rightHasKey = true;
12938                        break;
12939                    }
12940                }
12941                if (!rightHasKey) {
12942                    if (kind == JoinKind.USING) {
12943                        throw new SemanticIRBuildException(
12944                                Diagnostic.error(DiagnosticCode.USING_KEY_NOT_DECLARED,
12945                                "USING key '" + keyName + "' is not declared on "
12946                                        + "right-side relation '" + rightAlias
12947                                        + "'; USING requires the key to exist on "
12948                                        + "both sides", rightTable));
12949                    }
12950                    // NATURAL: silently skip — should be unreachable
12951                    // because keys come from the intersection.
12952                    continue;
12953                }
12954                joinRefsOut.add(new ColumnRef(rightAlias, keyName));
12955            }
12956        }
12957    }
12958
12959    /**
12960     * Slice 64 — true iff the given table reference is a CTE with an
12961     * explicit column list (e.g. {@code WITH x(a, b) AS ...}). Slice
12962     * 64 originally used this to defer USING joins against such CTEs;
12963     * slice 103 lifted that deferral by wiring the slice-102 rename
12964     * helper into the SELECT-side CTE walker. The helper is retained
12965     * for {@link #buildUsingScope}'s ambiguity check (defense in depth)
12966     * and may be reused by future call sites that need to discriminate
12967     * the shape.
12968     */
12969    private static boolean hasExplicitCteColumnList(TTable table) {
12970        if (table == null) return false;
12971        TCTE cte = table.getCTE();
12972        return cte != null && cte.getColumnList() != null
12973                && cte.getColumnList().size() > 0;
12974    }
12975
12976    /**
12977     * Slice 65 — read the renamed column names from a CTE's explicit
12978     * column list ({@code WITH x(a, b) AS ...}). Used by
12979     * {@link #buildUsingScope}'s ambiguity check as a defense-in-depth
12980     * complement to {@link #lookupRelationColumnNames}. Slice 65
12981     * originally needed this because the CTE body's StatementGraph
12982     * published inner-projection names; slice 103 lifted that gap by
12983     * applying the slice-102 rename helper on the SELECT side, so the
12984     * in-scope-map path now returns the renamed list too.
12985     */
12986    private static java.util.List<String> explicitCteColumnNames(TTable table) {
12987        if (table == null) return null;
12988        TCTE cte = table.getCTE();
12989        if (cte == null) return null;
12990        if (cte.getColumnList() == null || cte.getColumnList().size() == 0) {
12991            return null;
12992        }
12993        java.util.List<String> names = new java.util.ArrayList<>(cte.getColumnList().size());
12994        for (int i = 0; i < cte.getColumnList().size(); i++) {
12995            TObjectName col = cte.getColumnList().getObjectName(i);
12996            if (col == null) continue;
12997            String n = col.getColumnNameOnly();
12998            if (n == null || n.isEmpty()) continue;
12999            names.add(n);
13000        }
13001        return names.isEmpty() ? null : names;
13002    }
13003
13004    /**
13005     * Slice 64 — look up column names for a FROM-clause relation
13006     * combining the slice-58 base-table catalog and the slice-60
13007     * in-scope CTE/subquery map. Returns {@code null} when neither
13008     * source has column info for the table.
13009     *
13010     * <p>The in-scope map is consulted <b>first</b>: when a CTE or
13011     * FROM-subquery has the same name as a base table in the catalog,
13012     * the scoped definition shadows the catalog (codex diff-review
13013     * round-2 P2 #1 — without this precedence, USING against the CTE
13014     * would see the catalog table's columns and reject a valid join).
13015     *
13016     * <p>Slice 103 — CTEs with an explicit column list are no longer
13017     * rejected upstream. The SELECT-side CTE walker now invokes the
13018     * slice-102 rename helper, so {@code ctePublishedColumns} carries
13019     * the renamed names; {@code addRelationToInScopeMap} reads from
13020     * that map, and this lookup returns the renamed list. (Slice 64's
13021     * older comment said the rename was deferred — that deferral was
13022     * lifted by slice 103.)
13023     */
13024    private static List<String> lookupRelationColumnNames(TTable table,
13025                                                          NameBindingProvider provider) {
13026        String key = effectiveAliasLowerCaseOrNull(table);
13027        if (key != null) {
13028            java.util.Map<String, List<String>> inScope = provider.getInScopeRelationColumns();
13029            if (inScope != null) {
13030                List<String> scoped = inScope.get(key);
13031                if (scoped != null) return scoped;
13032            }
13033        }
13034        return provider.getRelationColumnNames(table);
13035    }
13036
13037    /**
13038     * Slice 66 — accumulated row type of the LEFT side of a top-level
13039     * {@code TJoin}. Maintained per top-level TJoin so that mixed
13040     * ON/CROSS/USING/NATURAL chains can be reasoned about against the
13041     * full visible row type (NATURAL JOIN's right operand sees every
13042     * column visible in the accumulated left, not just the immediate
13043     * prior table).
13044     *
13045     * <p>{@link #complete} flips to {@code false} when any contributor
13046     * along the chain has no resolvable catalog. A {@code false}
13047     * {@code complete} blocks subsequent NATURAL JoinItems from
13048     * inferring their shared-key list — they reject with a tuned
13049     * catalog-required diagnostic naming whichever side(s) lack
13050     * catalog metadata.
13051     */
13052    private static final class LeftOutputState {
13053        final java.util.LinkedHashMap<String, List<TTable>> columns = new java.util.LinkedHashMap<>();
13054        boolean complete = true;
13055        final List<String> missingAliases = new ArrayList<>();
13056
13057        void markMissing(TTable t) {
13058            complete = false;
13059            String alias = effectiveAliasOf(t);
13060            if (alias != null && !alias.isEmpty()) {
13061                if (!missingAliases.contains(alias)) {
13062                    missingAliases.add(alias);
13063                }
13064            }
13065        }
13066    }
13067
13068    /**
13069     * Slice 66 — result of {@link #naturalSharedKeys}. Either a SUCCESS
13070     * (with the inferred key list in left-output insertion order) or
13071     * one of three failure kinds:
13072     *
13073     * <ul>
13074     *   <li>{@code INCOMPLETE_LEFT}: at least one prior contributor on
13075     *       the accumulated left side had null/empty catalog;</li>
13076     *   <li>{@code MISSING_RIGHT}: right table has null/empty catalog;</li>
13077     *   <li>{@code BOTH_MISSING}: both above conditions hold.</li>
13078     * </ul>
13079     *
13080     * <p>Failures carry diagnostic aliases so the caller can produce
13081     * a side-specific reject message.
13082     */
13083    private static final class NaturalKeyResult {
13084        enum Kind { SUCCESS, INCOMPLETE_LEFT, MISSING_RIGHT, BOTH_MISSING }
13085        final Kind kind;
13086        final List<String> keys;
13087        final List<String> leftMissingAliases;
13088        final String rightAlias;
13089
13090        private NaturalKeyResult(Kind kind, List<String> keys,
13091                                 List<String> leftMissingAliases,
13092                                 String rightAlias) {
13093            this.kind = kind;
13094            this.keys = keys;
13095            this.leftMissingAliases = leftMissingAliases;
13096            this.rightAlias = rightAlias;
13097        }
13098        static NaturalKeyResult success(List<String> keys) {
13099            return new NaturalKeyResult(Kind.SUCCESS, keys, null, null);
13100        }
13101        static NaturalKeyResult incompleteLeft(List<String> missing) {
13102            return new NaturalKeyResult(Kind.INCOMPLETE_LEFT, null, missing, null);
13103        }
13104        static NaturalKeyResult missingRight(String alias) {
13105            return new NaturalKeyResult(Kind.MISSING_RIGHT, null, null, alias);
13106        }
13107        static NaturalKeyResult bothMissing(List<String> missing, String alias) {
13108            return new NaturalKeyResult(Kind.BOTH_MISSING, null, missing, alias);
13109        }
13110    }
13111
13112    /**
13113     * Slice 66 — seed the {@link LeftOutputState} with the top-left
13114     * table of a top-level TJoin. Used at the start of each TJoin walk.
13115     */
13116    private static void seedLeftOutput(LeftOutputState state, TTable t,
13117                                       NameBindingProvider provider) {
13118        if (t == null) return;
13119        List<String> cols = lookupRelationColumnNames(t, provider);
13120        if (cols == null || cols.isEmpty()) {
13121            state.markMissing(t);
13122            return;
13123        }
13124        for (String c : cols) {
13125            if (c == null || c.isEmpty()) continue;
13126            String colLC = c.toLowerCase(Locale.ROOT);
13127            List<TTable> contributors = state.columns.get(colLC);
13128            if (contributors == null) {
13129                contributors = new ArrayList<>();
13130                state.columns.put(colLC, contributors);
13131            }
13132            contributors.add(t);
13133        }
13134    }
13135
13136    /**
13137     * Slice 66 — append the right table of an ON-shaped or CROSS
13138     * JoinItem into the running {@link LeftOutputState}. Each catalog
13139     * column is added as a new entry (or extends the contributor list
13140     * for an existing same-named entry). Mirrors
13141     * {@link #seedLeftOutput} but additive.
13142     */
13143    private static void appendRightToLeftOutput(LeftOutputState state, TTable right,
13144                                                NameBindingProvider provider) {
13145        if (right == null) return;
13146        List<String> cols = lookupRelationColumnNames(right, provider);
13147        if (cols == null || cols.isEmpty()) {
13148            state.markMissing(right);
13149            return;
13150        }
13151        for (String c : cols) {
13152            if (c == null || c.isEmpty()) continue;
13153            String colLC = c.toLowerCase(Locale.ROOT);
13154            List<TTable> contributors = state.columns.get(colLC);
13155            if (contributors == null) {
13156                contributors = new ArrayList<>();
13157                state.columns.put(colLC, contributors);
13158            }
13159            contributors.add(right);
13160        }
13161    }
13162
13163    /**
13164     * Slice 66 — merge the right table of a USING-shaped or
13165     * NATURAL-shaped JoinItem. Columns in {@code mergedKeys} are
13166     * appended to the existing same-named contributor list at their
13167     * original output position (no new slot); other columns are
13168     * appended as new entries (or contributed to an existing same-named
13169     * entry — slice-59 plain-vs-plain duplicate admit).
13170     */
13171    private static void mergeRightIntoLeftOutput(LeftOutputState state, TTable right,
13172                                                 NameBindingProvider provider,
13173                                                 List<String> mergedKeys) {
13174        if (right == null) return;
13175        java.util.Set<String> mergedKeysLC = new HashSet<>();
13176        if (mergedKeys != null) {
13177            for (String k : mergedKeys) {
13178                if (k != null && !k.isEmpty()) {
13179                    mergedKeysLC.add(k.toLowerCase(Locale.ROOT));
13180                }
13181            }
13182        }
13183        List<String> cols = lookupRelationColumnNames(right, provider);
13184        if (cols == null || cols.isEmpty()) {
13185            state.markMissing(right);
13186            return;
13187        }
13188        for (String c : cols) {
13189            if (c == null || c.isEmpty()) continue;
13190            String colLC = c.toLowerCase(Locale.ROOT);
13191            // mergedKeysLC.contains(colLC) — append to existing entry;
13192            // !mergedKeysLC.contains(colLC) && state.columns.containsKey(colLC)
13193            //   — append to existing entry (plain-vs-plain duplicate);
13194            // !state.columns.containsKey(colLC) — new entry.
13195            List<TTable> contributors = state.columns.get(colLC);
13196            if (contributors == null) {
13197                contributors = new ArrayList<>();
13198                state.columns.put(colLC, contributors);
13199            }
13200            contributors.add(right);
13201        }
13202    }
13203
13204    /**
13205     * Slice 66 — infer the NATURAL JOIN shared-column list for the
13206     * current JoinItem. Returns one of four results per §6.1 of the
13207     * slice-66 plan. The shared list uses catalog-declared spelling
13208     * from the FIRST contributor that publishes each key (NATURAL has
13209     * no SQL-written key token, so the catalog form is the only
13210     * source of truth).
13211     */
13212    private static NaturalKeyResult naturalSharedKeys(LeftOutputState leftState,
13213                                                     TTable right,
13214                                                     NameBindingProvider provider) {
13215        List<String> rightCols = lookupRelationColumnNames(right, provider);
13216        boolean rightMissing = (rightCols == null || rightCols.isEmpty());
13217        if (!leftState.complete && rightMissing) {
13218            return NaturalKeyResult.bothMissing(leftState.missingAliases,
13219                    effectiveAliasOf(right));
13220        }
13221        if (!leftState.complete) {
13222            return NaturalKeyResult.incompleteLeft(leftState.missingAliases);
13223        }
13224        if (rightMissing) {
13225            return NaturalKeyResult.missingRight(effectiveAliasOf(right));
13226        }
13227        java.util.Set<String> rightLC = new HashSet<>();
13228        for (String c : rightCols) {
13229            if (c != null && !c.isEmpty()) {
13230                rightLC.add(c.toLowerCase(Locale.ROOT));
13231            }
13232        }
13233        List<String> shared = new ArrayList<>();
13234        for (java.util.Map.Entry<String, List<TTable>> e
13235                : leftState.columns.entrySet()) {
13236            String keyLC = e.getKey();
13237            if (rightLC.contains(keyLC)) {
13238                shared.add(firstCatalogSpelling(e.getValue(), keyLC, provider));
13239            }
13240        }
13241        return NaturalKeyResult.success(shared);
13242    }
13243
13244    /**
13245     * Slice 66 — return the catalog-declared spelling of {@code keyLC}
13246     * from the first contributor in insertion order that publishes the
13247     * key with a non-null spelling. Defensive fallback to {@code keyLC}
13248     * if no contributor exposes the spelling (unreachable in practice
13249     * because contributors are catalogued by construction).
13250     */
13251    private static String firstCatalogSpelling(List<TTable> contributors,
13252                                               String keyLC,
13253                                               NameBindingProvider provider) {
13254        if (contributors != null) {
13255            for (TTable t : contributors) {
13256                List<String> cols = lookupRelationColumnNames(t, provider);
13257                if (cols == null) continue;
13258                for (String c : cols) {
13259                    if (c != null && c.equalsIgnoreCase(keyLC)) {
13260                        return c;
13261                    }
13262                }
13263            }
13264        }
13265        return keyLC;
13266    }
13267
13268    /**
13269     * Slice 66 — diagnostic helper. Joins a list of aliases for the
13270     * NATURAL-required catalog reject message.
13271     */
13272    private static String formatAliasList(List<String> aliases) {
13273        if (aliases == null || aliases.isEmpty()) return "<none>";
13274        StringBuilder sb = new StringBuilder();
13275        for (int i = 0; i < aliases.size(); i++) {
13276            if (i > 0) sb.append(", ");
13277            sb.append("'").append(aliases.get(i)).append("'");
13278        }
13279        return sb.toString();
13280    }
13281
13282    /**
13283     * Slice 66 — turn a {@link NaturalKeyResult} failure into a
13284     * structured diagnostic for the gated reject inside
13285     * {@link #buildRelations}.
13286     */
13287    private static String formatNaturalCatalogReject(NaturalKeyResult r) {
13288        switch (r.kind) {
13289            case INCOMPLETE_LEFT:
13290                return "NATURAL JOIN requires catalog metadata for both sides; "
13291                        + "left-side row type is incomplete due to uncatalogued "
13292                        + "relation(s) " + formatAliasList(r.leftMissingAliases)
13293                        + "; supply a TSQLEnv (or in-scope CTE / FROM-subquery "
13294                        + "body) for the missing relation(s), or rewrite as "
13295                        + "JOIN ... ON";
13296            case MISSING_RIGHT:
13297                return "NATURAL JOIN requires catalog metadata for both sides; "
13298                        + "right-side relation '" + r.rightAlias
13299                        + "' has no resolvable column list; supply a TSQLEnv "
13300                        + "(or in-scope CTE / FROM-subquery body) for this "
13301                        + "relation, or rewrite as JOIN ... ON";
13302            case BOTH_MISSING:
13303                return "NATURAL JOIN requires catalog metadata for both sides; "
13304                        + "left-side row type is incomplete due to uncatalogued "
13305                        + "relation(s) " + formatAliasList(r.leftMissingAliases)
13306                        + " and right-side relation '" + r.rightAlias
13307                        + "' also has no resolvable column list; supply a "
13308                        + "TSQLEnv for the missing relation(s), or rewrite "
13309                        + "as JOIN ... ON";
13310            default:
13311                return "NATURAL JOIN: unexpected result kind " + r.kind;
13312        }
13313    }
13314
13315    /**
13316     * Slice 65 — fail fast when a JOIN ON clause references a USING
13317     * merged key by its bare (unqualified) name. JOIN ON requires
13318     * per-position scope (only relations BEFORE that JoinItem are
13319     * visible), which slice 65 does not yet model; the merged-key
13320     * collector applied by other clauses would over-include later
13321     * relations. Reject the shape so the slice-66+ slice can lift
13322     * with proper per-position scope.
13323     *
13324     * <p>This is the narrowed replacement for slice-64's
13325     * {@code rejectUnqualifiedUsingKeyReferences}, which scanned the
13326     * entire SELECT body. Slice 65 admits unqualified USING-key refs
13327     * in every other clause via the merged-key collector.
13328     *
13329     * <p>Qualified references (e.g. {@code a.k}, {@code b.k}) and
13330     * column references whose names don't match a USING key are
13331     * unaffected.
13332     */
13333    private static void rejectUnqualifiedMergedKeyInJoinOn(TSelectSqlStatement select,
13334                                                           NameBindingProvider provider) {
13335        if (select.joins == null) return;
13336        // Walk each TOP-LEVEL TJoin independently — each comma-FROM
13337        // group has its own scope for JOIN ON purposes (codex slice-65
13338        // diff-review round-4 P2 #1). Within one TJoin, walk JoinItems
13339        // in FROM order and track which merged keys (USING-declared OR
13340        // NATURAL-inferred) have been established. An ON clause is only
13341        // checked against keys that are ALREADY merged at that position;
13342        // a bare `k` in an ON before any USING(k) / NATURAL is just
13343        // resolver2's unqualified-binding case.
13344        //
13345        // Slice 66: NATURAL JoinItems contribute their catalog-inferred
13346        // key list to declaredKeysSoFar. When NATURAL would fail the
13347        // catalog requirement (INCOMPLETE_LEFT / MISSING_RIGHT /
13348        // BOTH_MISSING), the preflight silently skips recording this
13349        // JoinItem's keys — the gated reject in buildRelations will
13350        // fire with a catalog-required diagnostic and the user sees
13351        // that error first.
13352        //
13353        // Identity skip set: USING-clause own TObjectNames are
13354        // declarations not references; never matched against the
13355        // declaredKeysSoFar set since the preflight only walks ON
13356        // conditions. Kept as a defensive no-op.
13357        final java.util.Set<TObjectName> skip =
13358                java.util.Collections.newSetFromMap(
13359                        new java.util.IdentityHashMap<TObjectName, Boolean>());
13360        for (int j = 0; j < select.joins.size(); j++) {
13361            TJoin top = select.joins.getJoin(j);
13362            if (top == null) continue;
13363            TJoinItemList items = top.getJoinItems();
13364            if (items == null) continue;
13365            // Reset per top-level TJoin so independent comma-FROM
13366            // groups don't poison each other's ON clauses.
13367            final java.util.Set<String> declaredKeysSoFar = new java.util.HashSet<>();
13368            LeftOutputState leftState = new LeftOutputState();
13369            seedLeftOutput(leftState, top.getTable(), provider);
13370            for (int i = 0; i < items.size(); i++) {
13371                TJoinItem item = items.getJoinItem(i);
13372                if (item == null) continue;
13373                // Check ON FIRST (uses scope BEFORE this JoinItem), then
13374                // record this JoinItem's USING/NATURAL declarations so
13375                // future siblings see them.
13376                TExpression onCond = item.getOnCondition();
13377                if (onCond != null && !declaredKeysSoFar.isEmpty()) {
13378                    final java.util.Set<String> alreadyDeclared =
13379                            new java.util.HashSet<>(declaredKeysSoFar);
13380                    onCond.acceptChildren(new TParseTreeVisitor() {
13381                        int nestedSelectDepth = 0;
13382
13383                        @Override
13384                        public void preVisit(TSelectSqlStatement nested) {
13385                            nestedSelectDepth++;
13386                        }
13387
13388                        @Override
13389                        public void postVisit(TSelectSqlStatement nested) {
13390                            nestedSelectDepth--;
13391                        }
13392
13393                        @Override
13394                        public void preVisit(TObjectName node) {
13395                            if (nestedSelectDepth > 0) return;
13396                            if (skip.contains(node)) return;
13397                            if (node.getDbObjectType() != EDbObjectType.column) return;
13398                            String name = node.getColumnNameOnly();
13399                            if (name == null || name.isEmpty() || "*".equals(name)) return;
13400                            if (!alreadyDeclared.contains(name.toLowerCase(Locale.ROOT))) return;
13401                            String qualifier = node.getTableString();
13402                            if (qualifier == null || qualifier.isEmpty()) {
13403                                throw new SemanticIRBuildException(
13404                                        Diagnostic.error(DiagnosticCode.UNQUALIFIED_MERGED_KEY_IN_JOIN_ON,
13405                                        "unqualified reference to merged key '"
13406                                                + name + "' inside a JOIN ON condition "
13407                                                + "is deferred to a future slice "
13408                                                + "(per-position scope semantics needed); "
13409                                                + "qualify with a table alias "
13410                                                + "(e.g. a." + name + ") to disambiguate", null));
13411                            }
13412                        }
13413                    });
13414                }
13415                // Record this JoinItem's contribution to declaredKeysSoFar
13416                // and update leftState for NATURAL's accumulated-left
13417                // semantics.
13418                TTable rightTable = item.getTable();
13419                TObjectNameList usingCols = item.getUsingColumns();
13420                if (usingCols != null && usingCols.size() > 0) {
13421                    List<String> usingKeyNames = new ArrayList<>(usingCols.size());
13422                    for (int k = 0; k < usingCols.size(); k++) {
13423                        TObjectName n = usingCols.getObjectName(k);
13424                        if (n == null) continue;
13425                        skip.add(n);
13426                        String name = n.getColumnNameOnly();
13427                        if (name != null && !name.isEmpty()) {
13428                            declaredKeysSoFar.add(name.toLowerCase(Locale.ROOT));
13429                            usingKeyNames.add(name);
13430                        }
13431                    }
13432                    if (rightTable != null) {
13433                        mergeRightIntoLeftOutput(leftState, rightTable, provider, usingKeyNames);
13434                    }
13435                } else if (isNaturalJoinType(item.getJoinType()) && rightTable != null) {
13436                    NaturalKeyResult r = naturalSharedKeys(leftState, rightTable, provider);
13437                    if (r.kind == NaturalKeyResult.Kind.SUCCESS) {
13438                        for (String s : r.keys) {
13439                            if (s != null && !s.isEmpty()) {
13440                                declaredKeysSoFar.add(s.toLowerCase(Locale.ROOT));
13441                            }
13442                        }
13443                        mergeRightIntoLeftOutput(leftState, rightTable, provider, r.keys);
13444                    } else {
13445                        // Catalog-required reject fires upstream in
13446                        // buildRelations. Defensively append for state
13447                        // consistency.
13448                        appendRightToLeftOutput(leftState, rightTable, provider);
13449                    }
13450                } else if (rightTable != null) {
13451                    appendRightToLeftOutput(leftState, rightTable, provider);
13452                }
13453            }
13454        }
13455    }
13456
13457    /**
13458     * Slice 65 — compute the {@link UsingScope} for the current SELECT
13459     * body from its FROM-clause USING joins. Walks every {@link TJoin}
13460     * in {@code select.joins} and for each USING(k) JoinItem, builds
13461     * the per-key equivalence class via DSU-like union over prior
13462     * relations + the right-side relation. Then materializes each
13463     * class by a separate FROM-order pass with identity dedup so
13464     * chained USING joins (`a JOIN b USING(k) JOIN c USING(k)`)
13465     * produce {@code [a, b, c]}, never duplicates.
13466     *
13467     * <p>For each class, builds a {@link UsingScope.MergedKeyEntry}
13468     * with FROM-ordered merged source refs (one per relation that
13469     * publishes the key per catalog / in-scope map; unknown-metadata
13470     * priors emit refs unconditionally, matching slice-64's over-
13471     * approximation policy in {@link #populateUsingJoinRefs}).
13472     *
13473     * <p>Ambiguity is precomputed:
13474     * <ul>
13475     *   <li>{@code entries.size() > 1}: two disconnected USING classes
13476     *       share the same key name.</li>
13477     *   <li>{@code entries.size() == 1} AND a FROM relation outside
13478     *       the class has catalog metadata that declares the key:
13479     *       out-of-class same-named column.</li>
13480     * </ul>
13481     *
13482     * <p>Returns {@link UsingScope#EMPTY} when no USING clauses are
13483     * present in {@code select.joins}.
13484     */
13485    private static UsingScope buildUsingScope(TSelectSqlStatement select,
13486                                              NameBindingProvider provider) {
13487        if (select.joins == null) return UsingScope.EMPTY;
13488        // Slice 86 — delegate to the shared TJoinList-taking helper so
13489        // joined UPDATE (slice 86 buildUpdateUsingScope) can reuse the
13490        // identical scope-build pipeline.
13491        return buildUsingScopeFromJoinList(select.joins, provider);
13492    }
13493
13494    /**
13495     * Slice 86 — compute the {@link UsingScope} for a joined UPDATE's
13496     * FROM clause via {@code update.getJoins()}. Mirrors slice-65
13497     * {@link #buildUsingScope}: USING / NATURAL JoinItems contribute
13498     * merged-key equivalence classes; unqualified merged-key references
13499     * in SET RHS / WHERE / RETURNING resolve to the merged source list.
13500     *
13501     * <p>Returns {@link UsingScope#EMPTY} when no USING/NATURAL JoinItems
13502     * appear in the FROM clause.
13503     */
13504    private static UsingScope buildUpdateUsingScope(TUpdateSqlStatement update,
13505                                                    NameBindingProvider provider) {
13506        if (update == null) return UsingScope.EMPTY;
13507        return buildUsingScopeFromJoinList(update.getJoins(), provider);
13508    }
13509
13510    /**
13511     * Slice 86 — shared {@link UsingScope} computation extracted from
13512     * slice-65 {@link #buildUsingScope}. Takes the {@link TJoinList}
13513     * directly so it can be invoked from both SELECT
13514     * ({@link #buildUsingScope}) and joined UPDATE
13515     * ({@link #buildUpdateUsingScope}).
13516     *
13517     * <p>Behavior identical to slice 65/66: per-key DSU union over prior
13518     * relations + right-side relation per top-level {@link TJoin};
13519     * disconnected comma-FROM groups keep their own per-key components;
13520     * NATURAL JoinItems infer shared keys against accumulated left row
13521     * type via {@link LeftOutputState}; ambiguity detection walks all
13522     * FROM relations for out-of-class same-named columns.
13523     */
13524    private static UsingScope buildUsingScopeFromJoinList(TJoinList joins,
13525                                                          NameBindingProvider provider) {
13526        if (joins == null) return UsingScope.EMPTY;
13527        // Pass 1: per-key DSU. For each USING(k) or NATURAL JoinItem,
13528        // union the prior relations PUBLISHING the key (catalog-narrowed
13529        // per codex slice-66 round-1 P1 #1) with the right-side relation,
13530        // scoped to the enclosing top-level TJoin (chained merges within
13531        // one TJoin transitively connect through DSU). Disconnected
13532        // top-level TJoins (comma-FROM) keep their own per-key components.
13533        // Slice 66 maintains a LeftOutputState alongside the loop so
13534        // NATURAL JoinItems can infer their shared-key list against the
13535        // accumulated left row type.
13536        java.util.Map<String, java.util.List<java.util.List<TTable>>> perKeyComponents =
13537                new java.util.LinkedHashMap<>();
13538        // Track the SQL-written spelling of each merged key (the first
13539        // occurrence in FROM order). For USING keys this is the
13540        // SQL-written USING-clause case (slice-64 contract); for
13541        // NATURAL keys this is the catalog-declared spelling from the
13542        // first contributor.
13543        java.util.Map<String, String> originalSpellingByKey = new java.util.HashMap<>();
13544        for (int jx = 0; jx < joins.size(); jx++) {
13545            TJoin top = joins.getJoin(jx);
13546            if (top == null) continue;
13547            TJoinItemList items = top.getJoinItems();
13548            if (items == null) continue;
13549            TTable topTable = top.getTable();
13550            // Slice 66: per-TJoin LeftOutputState for NATURAL inference.
13551            LeftOutputState leftState = new LeftOutputState();
13552            seedLeftOutput(leftState, topTable, provider);
13553            // Per-key in-progress chain for THIS top-level TJoin.
13554            java.util.Map<String, java.util.List<TTable>> inProgressByKey =
13555                    new java.util.HashMap<>();
13556            for (int i = 0; i < items.size(); i++) {
13557                TJoinItem item = items.getJoinItem(i);
13558                if (item == null) continue;
13559                TTable rightTable = item.getTable();
13560                if (rightTable == null) continue;
13561
13562                // Determine merged keys for this JoinItem and the
13563                // emitted spelling per key. Three cases:
13564                //   USING:   keys = syntactic usingCols; spelling = USING-clause text.
13565                //   NATURAL: keys = catalog intersection (when SUCCESS);
13566                //            spelling = catalog spelling.
13567                //   ON/CROSS/other: skip — append to leftState only.
13568                List<String> keyNames;
13569                java.util.Map<String, String> spellingByKeyLC = new java.util.HashMap<>();
13570                TObjectNameList usingCols = item.getUsingColumns();
13571                if (usingCols != null && usingCols.size() > 0) {
13572                    keyNames = new java.util.ArrayList<>(usingCols.size());
13573                    for (int k = 0; k < usingCols.size(); k++) {
13574                        TObjectName keyNode = usingCols.getObjectName(k);
13575                        if (keyNode == null) continue;
13576                        String keyName = keyNode.getColumnNameOnly();
13577                        if (keyName == null || keyName.isEmpty()) continue;
13578                        keyNames.add(keyName);
13579                        spellingByKeyLC.put(keyName.toLowerCase(Locale.ROOT), keyName);
13580                    }
13581                } else if (isNaturalJoinType(item.getJoinType())) {
13582                    NaturalKeyResult r = naturalSharedKeys(leftState, rightTable, provider);
13583                    if (r.kind != NaturalKeyResult.Kind.SUCCESS) {
13584                        // Catalog-required reject already fired (or will
13585                        // fire) inside buildRelations. Defensively skip
13586                        // this JoinItem in the scope build; it does NOT
13587                        // contribute to the merged-key scope.
13588                        appendRightToLeftOutput(leftState, rightTable, provider);
13589                        continue;
13590                    }
13591                    keyNames = r.keys;
13592                    for (String s : keyNames) {
13593                        if (s != null && !s.isEmpty()) {
13594                            spellingByKeyLC.put(s.toLowerCase(Locale.ROOT), s);
13595                        }
13596                    }
13597                } else {
13598                    // ON / CROSS / other — no merged-key contribution.
13599                    appendRightToLeftOutput(leftState, rightTable, provider);
13600                    continue;
13601                }
13602
13603                // Prior relations for this JoinItem in FROM order:
13604                // topTable + items[0..i-1].getTable().
13605                java.util.List<TTable> priorRelations = new java.util.ArrayList<>();
13606                if (topTable != null) priorRelations.add(topTable);
13607                for (int p = 0; p < i; p++) {
13608                    TJoinItem prev = items.getJoinItem(p);
13609                    if (prev != null && prev.getTable() != null) {
13610                        priorRelations.add(prev.getTable());
13611                    }
13612                }
13613                for (String keyName : keyNames) {
13614                    if (keyName == null || keyName.isEmpty()) continue;
13615                    String keyLC = keyName.toLowerCase(Locale.ROOT);
13616                    // Record the first emitted spelling we see for this
13617                    // key. USING uses SQL-written spelling; NATURAL uses
13618                    // catalog-declared spelling.
13619                    if (!originalSpellingByKey.containsKey(keyLC)) {
13620                        originalSpellingByKey.put(keyLC, spellingByKeyLC.get(keyLC));
13621                    }
13622                    java.util.List<TTable> chain = inProgressByKey.get(keyLC);
13623                    if (chain == null) {
13624                        chain = new java.util.ArrayList<>();
13625                        inProgressByKey.put(keyLC, chain);
13626                    }
13627                    // Slice 66 catalog-narrowed union (codex round-1 P1 #1):
13628                    // for each prior relation, include in this key's
13629                    // equivalence class only if (a) catalog is unknown
13630                    // (over-approximate; slice-64 fallback), or (b)
13631                    // catalog declares the key. Skip if catalog is
13632                    // known and the key is proven absent.
13633                    for (TTable prior : priorRelations) {
13634                        if (containsByIdentity(chain, prior)) continue;
13635                        List<String> priorCols = lookupRelationColumnNames(prior, provider);
13636                        if (priorCols == null) {
13637                            chain.add(prior);
13638                            continue;
13639                        }
13640                        boolean priorPublishes = false;
13641                        for (String pc : priorCols) {
13642                            if (pc != null && pc.equalsIgnoreCase(keyLC)) {
13643                                priorPublishes = true;
13644                                break;
13645                            }
13646                        }
13647                        if (priorPublishes) {
13648                            chain.add(prior);
13649                        }
13650                    }
13651                    if (!containsByIdentity(chain, rightTable)) {
13652                        chain.add(rightTable);
13653                    }
13654                }
13655                // After the merged-key bookkeeping, merge right into
13656                // the leftState so subsequent NATURAL JoinItems see
13657                // the accumulated row type.
13658                mergeRightIntoLeftOutput(leftState, rightTable, provider, keyNames);
13659            }
13660            // Flush this TJoin's in-progress chains as one component
13661            // per key.
13662            for (java.util.Map.Entry<String, java.util.List<TTable>> e :
13663                    inProgressByKey.entrySet()) {
13664                java.util.List<java.util.List<TTable>> bucket = perKeyComponents.get(e.getKey());
13665                if (bucket == null) {
13666                    bucket = new java.util.ArrayList<>();
13667                    perKeyComponents.put(e.getKey(), bucket);
13668                }
13669                bucket.add(e.getValue());
13670            }
13671        }
13672        if (perKeyComponents.isEmpty()) return UsingScope.EMPTY;
13673        // Pass 2: materialize EquivalenceClass + MergedKeyEntry per
13674        // component. FROM-order is already preserved by Pass 1's
13675        // accumulation order (priorRelations + rightTable).
13676        java.util.Map<String, java.util.List<UsingScope.MergedKeyEntry>> entriesByName =
13677                new java.util.LinkedHashMap<>();
13678        for (java.util.Map.Entry<String, java.util.List<java.util.List<TTable>>> e :
13679                perKeyComponents.entrySet()) {
13680            String keyLC = e.getKey();
13681            // ColumnRef-emit spelling: SQL-written USING-clause spelling
13682            // (matches slice-64 populateUsingJoinRefs). Falls back to
13683            // keyLC if no spelling was recorded (defensive).
13684            String emitKeyName = originalSpellingByKey.containsKey(keyLC)
13685                    ? originalSpellingByKey.get(keyLC)
13686                    : keyLC;
13687            java.util.List<UsingScope.MergedKeyEntry> entries = new java.util.ArrayList<>();
13688            for (java.util.List<TTable> componentMembers : e.getValue()) {
13689                if (componentMembers.isEmpty()) continue;
13690                UsingScope.EquivalenceClass cls = new UsingScope.EquivalenceClass(
13691                        keyLC, componentMembers);
13692                java.util.List<ColumnRef> sources = new java.util.ArrayList<>();
13693                java.util.Set<String> seenAliases = new java.util.HashSet<>();
13694                for (TTable t : componentMembers) {
13695                    String effAlias = effectiveAliasOf(t);
13696                    if (effAlias == null || effAlias.isEmpty()) continue;
13697                    String aliasKey = effAlias.toLowerCase(Locale.ROOT);
13698                    if (seenAliases.contains(aliasKey)) continue;
13699                    java.util.List<String> cols = lookupRelationColumnNames(t, provider);
13700                    if (cols == null) {
13701                        // Metadata-unknown: emit ref (over-approximate).
13702                        sources.add(new ColumnRef(effAlias, emitKeyName));
13703                        seenAliases.add(aliasKey);
13704                        continue;
13705                    }
13706                    for (String c : cols) {
13707                        if (c != null && c.equalsIgnoreCase(keyLC)) {
13708                            sources.add(new ColumnRef(effAlias, emitKeyName));
13709                            seenAliases.add(aliasKey);
13710                            break;
13711                        }
13712                    }
13713                }
13714                if (!sources.isEmpty()) {
13715                    entries.add(new UsingScope.MergedKeyEntry(cls, sources));
13716                }
13717            }
13718            if (!entries.isEmpty()) {
13719                entriesByName.put(keyLC, entries);
13720            }
13721        }
13722        if (entriesByName.isEmpty()) return UsingScope.EMPTY;
13723        // Pass 3: precompute ambiguity per key.
13724        java.util.Map<String, String> ambiguityByName = new java.util.HashMap<>();
13725        java.util.List<TTable> allFromRelations = walkAllFromRelationsFromJoinList(joins);
13726        for (java.util.Map.Entry<String, java.util.List<UsingScope.MergedKeyEntry>> e :
13727                entriesByName.entrySet()) {
13728            String keyLC = e.getKey();
13729            java.util.List<UsingScope.MergedKeyEntry> entries = e.getValue();
13730            if (entries.size() > 1) {
13731                ambiguityByName.put(keyLC,
13732                        "multiple disconnected USING(" + keyLC + ") equivalence "
13733                                + "classes appear in this FROM (their merged "
13734                                + "columns share the same key name)");
13735                continue;
13736            }
13737            // Single class. Walk all FROM relations; if any out-of-class
13738            // relation is catalog-known to publish the key, mark ambiguous.
13739            UsingScope.EquivalenceClass cls = entries.get(0).getEquivClass();
13740            java.util.IdentityHashMap<TTable, Boolean> inClass = new java.util.IdentityHashMap<>();
13741            for (TTable m : cls.getMembers()) inClass.put(m, Boolean.TRUE);
13742            for (TTable r : allFromRelations) {
13743                if (inClass.containsKey(r)) continue;
13744                // Slice 65 diff-review round-3 P2 #2 (slice 103 update):
13745                // post-slice-103 both branches return the same data —
13746                // `lookupRelationColumnNames` consults the in-scope map
13747                // populated from `ctePublishedColumns`, which now holds
13748                // the renamed names. The discriminator is retained as a
13749                // defense-in-depth path for any call site that bypasses
13750                // the SELECT-side CTE walker but still wants to detect
13751                // renamed-key collisions; the slice-103 path falls into
13752                // the `lookupRelationColumnNames` branch and gets the
13753                // same renamed list.
13754                java.util.List<String> cols;
13755                if (hasExplicitCteColumnList(r)) {
13756                    cols = explicitCteColumnNames(r);
13757                } else {
13758                    cols = lookupRelationColumnNames(r, provider);
13759                }
13760                if (cols == null) continue;          // unknown → trust writer
13761                for (String c : cols) {
13762                    if (c != null && c.equalsIgnoreCase(keyLC)) {
13763                        String outAlias = effectiveAliasOf(r);
13764                        ambiguityByName.put(keyLC,
13765                                "the USING(" + keyLC + ") merged column collides "
13766                                        + "with column '" + keyLC + "' on relation '"
13767                                        + (outAlias != null ? outAlias : "<unnamed>")
13768                                        + "' which is not part of the USING equivalence class");
13769                        break;
13770                    }
13771                }
13772                if (ambiguityByName.containsKey(keyLC)) break;
13773            }
13774        }
13775        return new UsingScope(entriesByName, ambiguityByName);
13776    }
13777
13778    private static boolean containsByIdentity(java.util.List<TTable> list, TTable t) {
13779        for (TTable x : list) {
13780            if (x == t) return true;
13781        }
13782        return false;
13783    }
13784
13785    /**
13786     * Slice 65 — every FROM-clause relation reachable directly from
13787     * {@code select.joins} (every {@code top.getTable()} + every
13788     * {@code joinItem.getTable()}). Used by {@link #buildUsingScope}
13789     * to detect out-of-equivalence-class same-named columns.
13790     */
13791    private static java.util.List<TTable> walkAllFromRelations(TSelectSqlStatement select) {
13792        if (select == null) return new java.util.ArrayList<>();
13793        return walkAllFromRelationsFromJoinList(select.joins);
13794    }
13795
13796    /**
13797     * Slice 86 — shared {@link TJoinList}-taking walker for ambiguity
13798     * detection inside {@link #buildUsingScopeFromJoinList}. Used by both
13799     * SELECT ({@link #walkAllFromRelations}) and joined UPDATE
13800     * ({@link #buildUpdateUsingScope}).
13801     */
13802    private static java.util.List<TTable> walkAllFromRelationsFromJoinList(TJoinList joins) {
13803        java.util.List<TTable> out = new java.util.ArrayList<>();
13804        if (joins == null) return out;
13805        for (int j = 0; j < joins.size(); j++) {
13806            TJoin top = joins.getJoin(j);
13807            if (top == null) continue;
13808            if (top.getTable() != null) out.add(top.getTable());
13809            TJoinItemList items = top.getJoinItems();
13810            if (items == null) continue;
13811            for (int i = 0; i < items.size(); i++) {
13812                TJoinItem item = items.getJoinItem(i);
13813                if (item != null && item.getTable() != null) {
13814                    out.add(item.getTable());
13815                }
13816            }
13817        }
13818        return out;
13819    }
13820
13821    /**
13822     * Slice 64 — true iff any TJoinItem in {@code select.joins}
13823     * carries a non-empty USING list. Used by {@link #tryExpandStar}
13824     * to defer bare {@code *} over USING JOIN to S65 (merged-key
13825     * output naming).
13826     */
13827    private static boolean hasUsingInFromClause(TSelectSqlStatement select) {
13828        if (select.joins == null) return false;
13829        for (int j = 0; j < select.joins.size(); j++) {
13830            TJoin top = select.joins.getJoin(j);
13831            if (top == null) continue;
13832            TJoinItemList items = top.getJoinItems();
13833            if (items == null) continue;
13834            for (int i = 0; i < items.size(); i++) {
13835                TJoinItem item = items.getJoinItem(i);
13836                if (item != null
13837                        && item.getUsingColumns() != null
13838                        && item.getUsingColumns().size() > 0) {
13839                    return true;
13840                }
13841            }
13842        }
13843        return false;
13844    }
13845
13846    /**
13847     * Slice 66 — true iff any JoinItem is NATURAL AND its inferred
13848     * shared-column list is non-empty (catalog-resolved on both sides
13849     * and the intersection contains at least one column).
13850     *
13851     * <p>Routes bare {@code *} expansion through
13852     * {@link #expandBareStarOverUsing} when NATURAL contributes merged
13853     * keys. NATURAL with empty intersection or with INCOMPLETE_LEFT /
13854     * MISSING_RIGHT / BOTH_MISSING returns false for THIS JoinItem
13855     * (codex slice-66 round-4 P2 #3) — the bare-* path then falls
13856     * through to per-relation expansion, which is correct for an
13857     * empty-intersection NATURAL (Cartesian, no dedup needed). The
13858     * catalog-required reject for NATURAL fires upstream inside
13859     * {@link #buildRelations} before bare-* runs.
13860     *
13861     * <p>The walk maintains its own per-top-level-TJoin
13862     * {@link LeftOutputState} (so NATURAL inference against accumulated
13863     * left works the same way as in {@link #buildUsingScope} and
13864     * {@link #buildRelations}).
13865     */
13866    private static boolean hasNaturalJoinMergedKeysInFromClause(
13867            TSelectSqlStatement select, NameBindingProvider provider) {
13868        if (select.joins == null) return false;
13869        for (int j = 0; j < select.joins.size(); j++) {
13870            TJoin top = select.joins.getJoin(j);
13871            if (top == null) continue;
13872            TJoinItemList items = top.getJoinItems();
13873            if (items == null) continue;
13874            LeftOutputState leftState = new LeftOutputState();
13875            seedLeftOutput(leftState, top.getTable(), provider);
13876            for (int i = 0; i < items.size(); i++) {
13877                TJoinItem item = items.getJoinItem(i);
13878                if (item == null) continue;
13879                TTable rightTable = item.getTable();
13880                if (rightTable == null) continue;
13881                if (isNaturalJoinType(item.getJoinType())) {
13882                    NaturalKeyResult r = naturalSharedKeys(leftState, rightTable, provider);
13883                    if (r.kind == NaturalKeyResult.Kind.SUCCESS
13884                            && r.keys != null && !r.keys.isEmpty()) {
13885                        return true;
13886                    }
13887                    appendRightToLeftOutput(leftState, rightTable, provider);
13888                    continue;
13889                }
13890                TObjectNameList usingCols = item.getUsingColumns();
13891                if (usingCols != null && usingCols.size() > 0) {
13892                    List<String> usingKeyNames = new ArrayList<>(usingCols.size());
13893                    for (int k = 0; k < usingCols.size(); k++) {
13894                        TObjectName usingKey = usingCols.getObjectName(k);
13895                        if (usingKey == null) continue;
13896                        String keyName = usingKey.getColumnNameOnly();
13897                        if (keyName != null && !keyName.isEmpty()) {
13898                            usingKeyNames.add(keyName);
13899                        }
13900                    }
13901                    mergeRightIntoLeftOutput(leftState, rightTable, provider, usingKeyNames);
13902                } else {
13903                    appendRightToLeftOutput(leftState, rightTable, provider);
13904                }
13905            }
13906        }
13907        return false;
13908    }
13909
13910    /**
13911     * Two relations sharing the same effective alias would make
13912     * {@link ColumnRef#getRelationAlias()} ambiguous in the IR. Resolver2
13913     * may already flag column references in this case, but the IR-level
13914     * invariant still needs to hold.
13915     */
13916    private static void rejectDuplicateAliases(List<RelationSource> relations) {
13917        Set<String> seen = new HashSet<>();
13918        for (RelationSource r : relations) {
13919            if (!seen.add(r.getAlias())) {
13920                throw new SemanticIRBuildException(
13921                        Diagnostic.error(DiagnosticCode.DUPLICATE_RELATION_ALIAS,
13922                        "duplicate relation alias '" + r.getAlias()
13923                                + "' is not supported (would make ColumnRef ambiguous)", null));
13924            }
13925        }
13926    }
13927
13928    private static RelationSource buildRelation(TTable table, NameBindingProvider provider,
13929                                                boolean allowFromSubqueries) {
13930        // Reject FROM-subqueries when the caller did not extract them as
13931        // separate statements. After slice 18 the still-uncovered scopes
13932        // are scalar bodies (slice-11 boundary), set-op branches (slice-16
13933        // boundary), and set-op CTE bodies (build()'s set-op CTE dispatch
13934        // passes allowFromSubqueries=false to each branch).
13935        if (table.getTableType() == gudusoft.gsqlparser.ETableSource.subquery
13936                && !allowFromSubqueries) {
13937            // Slice 74: use effectiveAliasOf so anonymous subqueries
13938            // surface their synth name in the diagnostic instead of the
13939            // empty-string the prior `getAliasName() == null` ternary
13940            // produced.
13941            String bodyAlias = effectiveAliasOf(table);
13942            throw new SemanticIRBuildException(
13943                    Diagnostic.error(DiagnosticCode.FROM_SUBQUERY_IN_BODY_CONTEXT_NOT_SUPPORTED,
13944                    "FROM-clause subquery '" + (bodyAlias == null || bodyAlias.isEmpty() ? "<anonymous>" : bodyAlias)
13945                            + "' inside a scalar body, set-op branch, or set-op CTE body is not supported yet", table));
13946        }
13947        RelationBinding binding = provider.bindRelation(table);
13948        if (binding == null) {
13949            throw new SemanticIRBuildException(
13950                    Diagnostic.error(DiagnosticCode.TABLE_BINDING_UNRESOLVED,
13951                    "could not bind table " + safeName(table) + " (only base tables and in-scope CTEs are supported)", table));
13952        }
13953        // Effective alias: prefer the SQL-written alias, then the slice-74
13954        // synthetic alias for anonymous FROM-subqueries, then the table
13955        // name (mirrors effectiveAliasOf so RelationSource.alias and
13956        // ColumnRef.relationAlias stay aligned).
13957        String alias = effectiveAliasOf(table);
13958        return new RelationSource(alias, binding);
13959    }
13960
13961    private static String safeName(TTable t) {
13962        try {
13963            return t.getName();
13964        } catch (RuntimeException e) {
13965            return "<unnamed>";
13966        }
13967    }
13968
13969    // -----------------------------------------------------------------
13970    // Slice 58 / 59 — catalog-backed SELECT * expansion.
13971    //
13972    // The hook in buildOutputColumns calls tryExpandStar(rc, select,
13973    // provider, isPredicateBody, stmtName) for any result column whose
13974    // columnNameOnly is "*" (and as defense in depth for any
13975    // EExpressionType.list_t expression). tryExpandStar returns a
13976    // StarExpansionResult that is either EXPANDED (with a list of
13977    // OutputColumns) or one of several reasoned rejection kinds. The
13978    // hook then either appends the expanded columns or throws a
13979    // structured SemanticIRBuildException whose message is unique per
13980    // kind so external callers can pattern-match without parsing a
13981    // generic "not supported yet" string.
13982    //
13983    // Scope (slice 58):
13984    //   - single base-table FROM (1 join, no join items)
13985    //   - bare `*` or qualified `t.*`
13986    //   - catalog provided via NameBindingProvider#getRelationColumnNames
13987    //
13988    // Slice 59 lift:
13989    //   - multi-relation FROM is now supported when a single top-level
13990    //     TJoin carries one or more explicit JOIN clauses (joinItems).
13991    //     Each FROM relation must individually satisfy slice-58 rules
13992    //     (binding kind TABLE, catalog declares columns). Bare `*`
13993    //     concatenates per-relation expansions in FROM order; qualified
13994    //     `t.*` selects the one relation whose effective alias matches.
13995    //   - qualifier matching is now effective-alias only
13996    //     (alias if present, else table name) — case-insensitive. This
13997    //     unifies the rule across single- and multi-relation paths;
13998    //     `SELECT employees.* FROM employees e` rejects because the
13999    //     effective alias is `e`, not `employees`.
14000    //   - star expansion is rejected inside synthetic body contexts
14001    //     (scalar-subquery / set-op-branch / predicate-subquery) via
14002    //     SYNTHETIC_BODY_CONTEXT; the slice-58 path silently allowed
14003    //     this for catalog-equipped builds even though a multi-column
14004    //     expansion would corrupt scalar-body shape downstream.
14005    //
14006    // Slice 60 lift:
14007    //   - CTE star and FROM-subquery star (`a.*` and bare `*` over a CTE
14008    //     or FROM-clause subquery alias) are now supported via the
14009    //     in-scope-relation-columns map carried on the provider. The
14010    //     map is populated at each consuming-SELECT call site in build()
14011    //     and extractFromSubqueriesAsStatements before the consumer's
14012    //     buildOutputColumns runs; tryExpandStar reads it for CTE /
14013    //     SUBQUERY bindings. Explicit CTE column lists
14014    //     (`WITH a(x, y) AS ...`) stay rejected because the CTE body's
14015    //     StatementGraph publishes inner-projection names, not the
14016    //     explicit list, and emitLineageForStatement would point at
14017    //     non-existent body outputs. Lifting that path needs either
14018    //     body-output renaming or a published-name → body-name lineage
14019    //     map; deferred to a future slice.
14020    //
14021    // Slice 62 lift:
14022    //   - Comma-FROM (multiple top-level TJoin elements parsed from
14023    //     `FROM a, b, c`) is now admitted at the outer / CTE-body /
14024    //     FROM-subquery-body call sites. {@link #tryExpandStar} walks
14025    //     every top-level TJoin and accumulates relations in FROM
14026    //     order; bare `*` concatenates per-relation expansions and
14027    //     qualified `t.*` selects the matching effective-alias.
14028    //     Synthetic body contexts (scalar / set-op-branch / set-op-CTE
14029    //     / predicate) still reject comma-FROM via the gated reject
14030    //     in buildRelations and the slice-62 reject inside
14031    //     preflightExistsInnerShape.
14032    //
14033    // Out of scope (slice 60+):
14034    //   - SELECT * EXCEPT/REPLACE (BigQuery extensions; no slice scheduled)
14035    //   - Explicit CTE column list star expansion (slice 61+)
14036    // -----------------------------------------------------------------
14037
14038    enum StarExpansionKind {
14039        EXPANDED,
14040        PREDICATE_BODY_GUARD,
14041        // Defensive catch-all for malformed FROM lists: missing top-level
14042        // TJoin, null table on a top-level TJoin or a join item, or
14043        // empty {@code select.joins}. Slice 62 made comma-FROM admit
14044        // here (the walk iterates every top-level TJoin), so reaching
14045        // this kind indicates a parse-tree anomaly rather than a comma-
14046        // FROM rejection.
14047        MULTI_RELATION_FROM,
14048        NON_BASE_TABLE_RELATION,
14049        QUALIFIER_NOT_FOUND,
14050        // Slice 59: a qualifier matches 2+ relations (case-insensitive
14051        // effective-alias collision). Real SQL never reaches this case
14052        // unless `rejectDuplicateAliases` permitted a case-only collision
14053        // (it is case-sensitive at SemanticIRBuilder.java:5621).
14054        QUALIFIER_AMBIGUOUS,
14055        NO_CATALOG_OR_UNKNOWN_TABLE,
14056        // Slice 59: star expansion in synthetic body contexts
14057        // (scalar-subquery, set-op-branch, predicate-subquery) is rejected
14058        // because multi-column expansion would violate the body's shape
14059        // contract (e.g. scalar bodies must project exactly one column).
14060        SYNTHETIC_BODY_CONTEXT,
14061        // Slice 60: CTE has an explicit column list
14062        // (`WITH a(x, y) AS ...`). Deferred to a future slice because
14063        // the CTE body's StatementGraph publishes inner-projection
14064        // names, not the explicit list, and lineage emission cannot
14065        // bridge that without either body-output renaming or a
14066        // published-name → body-name map.
14067        EXPLICIT_CTE_COLUMN_LIST_DEFERRED,
14068        // Slice 60: CTE / SUBQUERY binding's published-column map
14069        // lookup returned null or empty. This indicates a builder
14070        // invariant failure (the body should have been built and
14071        // registered before the consumer's buildOutputColumns runs);
14072        // user SQL cannot reach this kind under normal builds — only
14073        // fabricated providers or a missed plumbing path would. The
14074        // diagnostic names the binding kind and qualified name so
14075        // regressions are loud, not silent.
14076        NO_INSCOPE_RELATION_COLUMNS
14077    }
14078
14079    static final class StarExpansionResult {
14080        final StarExpansionKind kind;
14081        final List<OutputColumn> columns;
14082        final String qualifier;
14083        final String detail;
14084
14085        private StarExpansionResult(StarExpansionKind kind,
14086                                    List<OutputColumn> columns,
14087                                    String qualifier,
14088                                    String detail) {
14089            this.kind = kind;
14090            this.columns = columns;
14091            this.qualifier = qualifier;
14092            this.detail = detail;
14093        }
14094
14095        static StarExpansionResult expanded(List<OutputColumn> cols) {
14096            return new StarExpansionResult(StarExpansionKind.EXPANDED, cols, null, null);
14097        }
14098
14099        static StarExpansionResult reject(StarExpansionKind kind) {
14100            return new StarExpansionResult(kind, null, null, null);
14101        }
14102
14103        static StarExpansionResult reject(StarExpansionKind kind, String qualifier, String detail) {
14104            return new StarExpansionResult(kind, null, qualifier, detail);
14105        }
14106    }
14107
14108    /**
14109     * Effective alias for a FROM-clause {@link TTable}: the SQL-written
14110     * alias if present, else the slice-74 synthetic alias for unaliased
14111     * FROM-subquery TTables (position-keyed via
14112     * {@link FromSubqueryNaming#synthAliasFor}), else the table name.
14113     * Mirrors the rule used by {@link #buildRelation} so
14114     * {@link ColumnRef#getRelationAlias()} stays aligned with what the
14115     * lineage emitter expects.
14116     */
14117    private static String effectiveAliasOf(TTable t) {
14118        if (t == null) return null;
14119        String alias = t.getAliasName();
14120        if (alias != null && !alias.isEmpty()) return alias;
14121        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
14122            return FromSubqueryNaming.synthAliasFor(t);
14123        }
14124        return t.getName();
14125    }
14126
14127    /**
14128     * Slice 58 / 59 — attempt to expand a {@code SELECT *} or
14129     * {@code SELECT alias.*} result column using the catalog exposed via
14130     * {@link NameBindingProvider#getRelationColumnNames(TTable)}.
14131     *
14132     * <p>Slice 58 supported only single-base-table FROM. Slice 59 lifts
14133     * the multi-relation case to JOIN forms (single top-level TJoin with
14134     * explicit JOIN clauses). Comma-FROM stays rejected by
14135     * {@code buildRelations}.
14136     *
14137     * <p>Returns {@link StarExpansionKind#EXPANDED} with one
14138     * {@link OutputColumn} per catalog-declared column on success.
14139     * Otherwise returns a reasoned rejection so the caller can throw a
14140     * shape-specific {@link SemanticIRBuildException}.
14141     */
14142    private static StarExpansionResult tryExpandStar(TResultColumn rc,
14143                                                     TSelectSqlStatement select,
14144                                                     NameBindingProvider provider,
14145                                                     boolean isPredicateBody,
14146                                                     String stmtName) {
14147        if (isPredicateBody) {
14148            // Defensive: the active rejection lives in
14149            // preflightExistsInnerShape (~line 3880) and fires before
14150            // this code runs. Slice-24 EXISTS-with-* tests pin that path.
14151            return StarExpansionResult.reject(StarExpansionKind.PREDICATE_BODY_GUARD);
14152        }
14153        // Slice 59: reject star expansion in synthetic body contexts.
14154        // Scalar-subquery bodies must project exactly one column;
14155        // set-op-branch bodies must keep per-branch column-count parity;
14156        // predicate-subquery bodies are constant or column-ref shapes
14157        // (slice 23/24/27). Multi-column expansion would corrupt all
14158        // three. The preflight at SemanticIRBuilder.java:1007 only
14159        // checks AST result-column count/name, not "*", so without this
14160        // guard a catalog-equipped scalar body `SELECT * FROM small`
14161        // would silently emit multiple OutputColumns.
14162        if (stmtName != null
14163                && (isScalarSyntheticName(stmtName)
14164                        || isSetOpBranchSyntheticName(stmtName)
14165                        || isPredicateSubquerySyntheticName(stmtName))) {
14166            return StarExpansionResult.reject(
14167                    StarExpansionKind.SYNTHETIC_BODY_CONTEXT, null,
14168                    "star expansion is not supported inside synthetic body '"
14169                            + stmtName + "' (scalar, set-op branch, or predicate body)");
14170        }
14171        // Extract qualifier (empty string for bare `*`, alias/name for `t.*`).
14172        String qualifier = "";
14173        TExpression expr = rc.getExpr();
14174        if (expr != null && expr.getObjectOperand() != null) {
14175            String q = expr.getObjectOperand().getTableString();
14176            if (q != null && !q.isEmpty()) {
14177                qualifier = q;
14178            }
14179        }
14180        // FROM-clause shape gate. Slice 59 supported a single top-level
14181        // TJoin with zero or more explicit JOIN clauses. Slice 62 lifts
14182        // comma-FROM to multi-TJoin: walk every top-level TJoin in
14183        // {@code select.joins} (a comma-FROM list parses as multiple
14184        // top-level TJoins) and accumulate every relation in FROM order.
14185        if (select.joins == null || select.joins.size() == 0) {
14186            return StarExpansionResult.reject(StarExpansionKind.MULTI_RELATION_FROM);
14187        }
14188        List<TTable> fromRelations = new ArrayList<>();
14189        for (int j = 0; j < select.joins.size(); j++) {
14190            TJoin topJoin = select.joins.getJoin(j);
14191            if (topJoin == null) {
14192                return StarExpansionResult.reject(StarExpansionKind.MULTI_RELATION_FROM);
14193            }
14194            TTable leftTable = topJoin.getTable();
14195            if (leftTable == null) {
14196                return StarExpansionResult.reject(StarExpansionKind.MULTI_RELATION_FROM);
14197            }
14198            fromRelations.add(leftTable);
14199            TJoinItemList items = topJoin.getJoinItems();
14200            if (items == null) continue;
14201            for (int i = 0; i < items.size(); i++) {
14202                TJoinItem item = items.getJoinItem(i);
14203                TTable rightTable = item.getTable();
14204                if (rightTable == null) {
14205                    return StarExpansionResult.reject(StarExpansionKind.MULTI_RELATION_FROM);
14206                }
14207                fromRelations.add(rightTable);
14208            }
14209        }
14210        // Slice 65 / 66: bare `*` over a USING / NATURAL JOIN collapses
14211        // merged keys. For each FROM relation in order, walk catalog/
14212        // in-scope columns; emit one OutputColumn per merged key (sources
14213        // = merged ref list) and one per non-merged column. Qualified
14214        // `t.*` is unaffected (single-relation path, no merged-key dedup).
14215        if (qualifier.isEmpty()
14216                && (hasUsingInFromClause(select)
14217                        || hasNaturalJoinMergedKeysInFromClause(select, provider))) {
14218            return expandBareStarOverUsing(select, provider, fromRelations);
14219        }
14220        // Qualified `t.*`: pick the (unique) FROM relation whose
14221        // effective alias matches the qualifier (case-insensitive).
14222        // Effective alias = `alias != null && !alias.isEmpty() ? alias :
14223        // tableName`, matching buildRelation at line 5649. Slice 58's
14224        // alias-OR-name match (line 5785 before slice 59) is replaced
14225        // here so `SELECT employees.* FROM employees e` rejects
14226        // (qualifier=`employees` ≠ effective alias `e`), consistent
14227        // with standard SQL correlation-name semantics.
14228        if (!qualifier.isEmpty()) {
14229            List<TTable> matches = new ArrayList<>();
14230            for (TTable t : fromRelations) {
14231                String ea = effectiveAliasOf(t);
14232                if (ea != null && ea.equalsIgnoreCase(qualifier)) {
14233                    matches.add(t);
14234                }
14235            }
14236            if (matches.isEmpty()) {
14237                return StarExpansionResult.reject(
14238                        StarExpansionKind.QUALIFIER_NOT_FOUND, qualifier, null);
14239            }
14240            if (matches.size() > 1) {
14241                StringBuilder names = new StringBuilder();
14242                for (int i = 0; i < matches.size(); i++) {
14243                    if (i > 0) names.append(", ");
14244                    names.append(effectiveAliasOf(matches.get(i)));
14245                }
14246                return StarExpansionResult.reject(
14247                        StarExpansionKind.QUALIFIER_AMBIGUOUS, qualifier,
14248                        "matches " + matches.size() + " FROM-clause relations: "
14249                                + names);
14250            }
14251            return expandSingleRelation(matches.get(0), provider, qualifier);
14252        }
14253        // Bare `*`: expand every FROM relation in order. Fail fast on
14254        // the first relation that does not satisfy the slice-58 rules
14255        // (binding kind TABLE, catalog declares columns); the caller
14256        // sees the per-relation rejection kind and detail. No partial
14257        // outputs are returned.
14258        List<OutputColumn> all = new ArrayList<>();
14259        for (TTable t : fromRelations) {
14260            StarExpansionResult one = expandSingleRelation(t, provider, "");
14261            if (one.kind != StarExpansionKind.EXPANDED) {
14262                return one;
14263            }
14264            all.addAll(one.columns);
14265        }
14266        return StarExpansionResult.expanded(all);
14267    }
14268
14269    /**
14270     * Slice 58 / 59 — pure per-relation star expander. Applies the
14271     * base-table-only + catalog rules and builds one
14272     * {@link OutputColumn} per catalog-declared column with a
14273     * {@link ColumnRef} whose {@code relationAlias} is the effective
14274     * alias of {@code target}. Returns {@link StarExpansionKind#EXPANDED}
14275     * on success, otherwise a tuned rejection.
14276     *
14277     * <p>The {@code qualifier} parameter is the SQL-written qualifier
14278     * for qualified `t.*` (empty for bare `*`); it is plumbed back into
14279     * the rejection result so the caller can include it in
14280     * user-visible diagnostics.
14281     */
14282    private static StarExpansionResult expandSingleRelation(TTable target,
14283                                                            NameBindingProvider provider,
14284                                                            String qualifier) {
14285        // The TTable's tableType cannot distinguish CTE from base table —
14286        // CTE references arrive as ETableSource.objectname. Use the
14287        // provider's bindRelation to get the resolved RelationKind.
14288        RelationBinding binding = provider.bindRelation(target);
14289        if (binding == null) {
14290            String diagAlias = effectiveAliasOf(target);
14291            return StarExpansionResult.reject(
14292                    StarExpansionKind.NON_BASE_TABLE_RELATION, qualifier,
14293                    "FROM source '"
14294                            + (diagAlias != null ? diagAlias : "<unnamed>")
14295                            + "' could not be bound (only base tables, in-scope CTEs, and FROM-subqueries are supported)");
14296        }
14297        RelationKind kind = binding.getKind();
14298        if (kind == RelationKind.TABLE) {
14299            // Slice 58 catalog-backed path. Returns null when no
14300            // catalog, when the catalog doesn't declare this table,
14301            // or when the table has no columns.
14302            List<String> columnNames = provider.getRelationColumnNames(target);
14303            if (columnNames == null || columnNames.isEmpty()) {
14304                String diagAlias = effectiveAliasOf(target);
14305                return StarExpansionResult.reject(
14306                        StarExpansionKind.NO_CATALOG_OR_UNKNOWN_TABLE, qualifier,
14307                        diagAlias);
14308            }
14309            return buildExpansionFromColumnNames(target, columnNames);
14310        }
14311        if (kind == RelationKind.CTE) {
14312            // Slice 60 + Slice 103: key by EFFECTIVE ALIAS in the consuming
14313            // SELECT, not by CTE name. This avoids a collision when a
14314            // FROM-subquery alias equals a visible CTE name and the
14315            // CTE is referenced under a different alias (codex
14316            // diff-review): `WITH a AS (...) SELECT c.*, a.* FROM a c
14317            // JOIN (SELECT ...) a ON ...` — both 'a' (CTE) and 'a'
14318            // (subquery alias) live in the FROM clause; effective
14319            // aliases are 'c' and 'a' respectively, so per-relation
14320            // entries cannot overwrite each other.
14321            //
14322            // Slice 103 — explicit CTE column lists (WITH a(x, y) AS ...)
14323            // are no longer rejected here. The slice-102 rename helper now
14324            // runs on the SELECT-side CTE walker too; the in-scope map
14325            // populated by addRelationToInScopeMap reads from
14326            // ctePublishedColumns, which the helper has populated with the
14327            // renamed names. Star expansion just falls through to the
14328            // in-scope lookup below; the renamed list comes back.
14329            String lookupKey = effectiveAliasLowerCaseOrNull(target);
14330            List<String> cteColumns = (lookupKey == null) ? null
14331                    : provider.getInScopeRelationColumns().get(lookupKey);
14332            if (cteColumns == null || cteColumns.isEmpty()) {
14333                String diagAlias = effectiveAliasOf(target);
14334                return StarExpansionResult.reject(
14335                        StarExpansionKind.NO_INSCOPE_RELATION_COLUMNS,
14336                        qualifier,
14337                        "CTE '" + diagAlias + "' has no published columns in "
14338                                + "the in-scope map (builder invariant: the "
14339                                + "CTE body should have been built and "
14340                                + "registered before this consumer ran)");
14341            }
14342            return buildExpansionFromColumnNames(target, cteColumns);
14343        }
14344        if (kind == RelationKind.SUBQUERY) {
14345            // Slice 60: same effective-alias keying as the CTE branch
14346            // above (codex diff-review). For a subquery the effective
14347            // alias IS the alias the SQL writer wrote (preflight
14348            // rejects anonymous subqueries), so this branch is also
14349            // unambiguous under the alias-collision example.
14350            String lookupKey = effectiveAliasLowerCaseOrNull(target);
14351            List<String> subColumns = (lookupKey == null) ? null
14352                    : provider.getInScopeRelationColumns().get(lookupKey);
14353            if (subColumns == null || subColumns.isEmpty()) {
14354                String diagAlias = effectiveAliasOf(target);
14355                return StarExpansionResult.reject(
14356                        StarExpansionKind.NO_INSCOPE_RELATION_COLUMNS,
14357                        qualifier,
14358                        "FROM-clause subquery '"
14359                                + (diagAlias != null ? diagAlias : "<unnamed>")
14360                                + "' has no published columns in the in-scope "
14361                                + "map (builder invariant: the subquery body "
14362                                + "should have been extracted and registered "
14363                                + "before this consumer ran)");
14364            }
14365            return buildExpansionFromColumnNames(target, subColumns);
14366        }
14367        // OUTER_REFERENCE / UNION / UNKNOWN: keep the slice-58 / 59
14368        // rejection contract. None of these arrive on a slice-60
14369        // FROM-clause relation via the current builder paths
14370        // (OUTER_REFERENCE bindings live only on RelationSource for
14371        // correlated scalar lookup; UNION is a set-op branch concept,
14372        // not a FROM-clause relation). Defensive catch-all.
14373        String diagAlias = effectiveAliasOf(target);
14374        String detail;
14375        switch (kind) {
14376            case OUTER_REFERENCE:
14377                detail = "OUTER_REFERENCE star expansion is not supported (relation '"
14378                        + diagAlias + "')";
14379                break;
14380            case UNION:
14381            case UNKNOWN:
14382            default:
14383                detail = "FROM source '" + diagAlias
14384                        + "' must be a base table, CTE, or FROM-subquery (got kind="
14385                        + kind + ")";
14386                break;
14387        }
14388        return StarExpansionResult.reject(
14389                StarExpansionKind.NON_BASE_TABLE_RELATION, qualifier, detail);
14390    }
14391
14392    /**
14393     * Slice 60 — shared helper that turns a column-name list into the
14394     * star-expansion OutputColumn list with one {@link ColumnRef} per
14395     * column whose {@code relationAlias} is the effective alias of the
14396     * target table (alias if present, else the table name). Used by all
14397     * three slice-58 / 59 / 60 paths.
14398     */
14399    private static StarExpansionResult buildExpansionFromColumnNames(
14400            TTable target, List<String> columnNames) {
14401        String alias = effectiveAliasOf(target);
14402        List<OutputColumn> outputs = new ArrayList<>(columnNames.size());
14403        for (String colName : columnNames) {
14404            ColumnRef ref = new ColumnRef(alias, colName);
14405            outputs.add(new OutputColumn(
14406                    colName,
14407                    /*derived=*/ false,
14408                    /*aggregate=*/ false,
14409                    Collections.singletonList(ref),
14410                    /*windowSpec=*/ null));
14411        }
14412        return StarExpansionResult.expanded(outputs);
14413    }
14414
14415    /**
14416     * Slice 65 — bare {@code *} over a USING JOIN: deduplicate the
14417     * merged key within each equivalence class. For each FROM relation
14418     * in order:
14419     * <ul>
14420     *   <li>look up columns via the existing
14421     *       {@link #lookupRelationColumnNames} (catalog + in-scope map);
14422     *       reject with {@link StarExpansionKind#NO_CATALOG_OR_UNKNOWN_TABLE}
14423     *       when null (we can't dedup without knowing what's there);</li>
14424     *   <li>for each column, check
14425     *       {@link UsingScope#entryContaining(String, TTable)};
14426     *       if a class contains this relation, emit a single
14427     *       merged-source {@link OutputColumn} the first time the class
14428     *       is seen and skip duplicates from later class members;</li>
14429     *   <li>otherwise emit a plain single-source OutputColumn.</li>
14430     * </ul>
14431     *
14432     * <p>Duplicate-output guard fires ONLY when the conflicting names
14433     * involve a USING-merged entry (merged-vs-plain or two disconnected
14434     * merged classes for the same key). Plain duplicates from
14435     * non-USING multi-relation expansion remain admitted (slice-59
14436     * behavior).
14437     *
14438     * <p>Output column order is <b>left-table order with USING-key
14439     * dedup within each equivalence class</b>: the merged column
14440     * appears at the position of its first member in FROM order. E.g.
14441     * {@code a(id, k), b(k, name)} → {@code [id, k, name]}.
14442     *
14443     * <p>This is INTENTIONALLY DIFFERENT from the ANSI/PostgreSQL
14444     * physical column order (which puts USING columns first, then
14445     * remaining left, then remaining right — would yield
14446     * {@code [k, id, name]}). The Semantic IR is not a query
14447     * executor; the order it surfaces is a lineage-tracking
14448     * presentation choice. Left-table order:
14449     * <ol>
14450     *   <li>matches the slice-65 roadmap resume protocol
14451     *       ({@code docs/designs/sql-semantic-governance-unified-roadmap.md}
14452     *       §13.1) which fixed this order before implementation;</li>
14453     *   <li>keeps the merged column physically adjacent to its
14454     *       left-side neighbors, matching how lineage tooling
14455     *       traditionally renders combined JOIN output;</li>
14456     *   <li>does not depend on USING-clause ordering (which is a
14457     *       syntactic choice, not a semantic one).</li>
14458     * </ol>
14459     * Codex diff-review round 5 flagged this as P2 (non-ANSI). The
14460     * choice was confirmed in plan-review and is locked by
14461     * {@code bareStarOverUsingLeftPositionPreserved} so any future
14462     * change to ANSI order is a deliberate observable contract
14463     * change, not a silent fix.
14464     */
14465    private static StarExpansionResult expandBareStarOverUsing(
14466            TSelectSqlStatement select,
14467            NameBindingProvider provider,
14468            List<TTable> fromRelations) {
14469        UsingScope scope = provider.getUsingScope();
14470        if (scope.isEmpty()) {
14471            // The slice-65 caller checks hasUsingInFromClause before
14472            // routing here, so an empty scope here means buildUsingScope
14473            // computed empty entries (unreachable in practice). Fall
14474            // back to a generic reject so the caller surfaces a
14475            // structured diagnostic.
14476            return StarExpansionResult.reject(
14477                    StarExpansionKind.SYNTHETIC_BODY_CONTEXT, null,
14478                    "bare * over JOIN ... USING reached the merged-key expander "
14479                            + "with an empty UsingScope (builder invariant failure)");
14480        }
14481        LinkedHashSet<String> emittedNamesLC = new LinkedHashSet<>();
14482        Set<String> mergedNamesLC = new HashSet<>();
14483        java.util.IdentityHashMap<UsingScope.EquivalenceClass, Boolean> emittedClasses =
14484                new java.util.IdentityHashMap<>();
14485        List<OutputColumn> outputs = new ArrayList<>();
14486        for (TTable t : fromRelations) {
14487            // Slice 103 lifted the explicit-CTE-column-list deferral:
14488            // populateUsingJoinRefs no longer rejects, and
14489            // lookupRelationColumnNames returns the renamed names from
14490            // the in-scope map (populated by the slice-102 rename
14491            // helper that the SELECT-side CTE walker now invokes).
14492            List<String> cols = lookupRelationColumnNames(t, provider);
14493            if (cols == null) {
14494                return StarExpansionResult.reject(
14495                        StarExpansionKind.NO_CATALOG_OR_UNKNOWN_TABLE, null,
14496                        effectiveAliasOf(t));
14497            }
14498            for (String c : cols) {
14499                if (c == null) continue;
14500                String keyLC = c.toLowerCase(Locale.ROOT);
14501                UsingScope.MergedKeyEntry entry = scope.entryContaining(keyLC, t);
14502                if (entry != null) {
14503                    // USING-merged column. Dedup per class.
14504                    if (emittedClasses.containsKey(entry.getEquivClass())) {
14505                        continue;
14506                    }
14507                    emittedClasses.put(entry.getEquivClass(), Boolean.TRUE);
14508                    OutputColumn cand = new OutputColumn(
14509                            c, /*derived=*/ false, /*aggregate=*/ false,
14510                            entry.getSources(), /*windowSpec=*/ null);
14511                    StarExpansionResult dup = appendMergedAwareOrReject(
14512                            outputs, emittedNamesLC, mergedNamesLC, cand, /*isMerged=*/ true);
14513                    if (dup != null) return dup;
14514                } else {
14515                    // Plain column. Slice-59 behavior: duplicate plain
14516                    // names are admitted. Codex round-5: the merged-aware
14517                    // guard fires only when ONE side is merged.
14518                    OutputColumn cand = new OutputColumn(
14519                            c, /*derived=*/ false, /*aggregate=*/ false,
14520                            Collections.singletonList(new ColumnRef(effectiveAliasOf(t), c)),
14521                            /*windowSpec=*/ null);
14522                    StarExpansionResult dup = appendMergedAwareOrReject(
14523                            outputs, emittedNamesLC, mergedNamesLC, cand, /*isMerged=*/ false);
14524                    if (dup != null) return dup;
14525                }
14526            }
14527        }
14528        return StarExpansionResult.expanded(outputs);
14529    }
14530
14531    /**
14532     * Slice 65 — duplicate-output helper for
14533     * {@link #expandBareStarOverUsing}. Fires the merged-vs-non-merged
14534     * collision guard. Returns a {@link StarExpansionResult} when the
14535     * caller should reject; returns {@code null} when the candidate is
14536     * appended successfully.
14537     */
14538    private static StarExpansionResult appendMergedAwareOrReject(
14539            List<OutputColumn> outputs,
14540            LinkedHashSet<String> emittedNamesLC,
14541            Set<String> mergedNamesLC,
14542            OutputColumn cand,
14543            boolean isMerged) {
14544        String nameLC = cand.getName().toLowerCase(Locale.ROOT);
14545        boolean alreadyEmitted = emittedNamesLC.contains(nameLC);
14546        boolean alreadyMerged = mergedNamesLC.contains(nameLC);
14547        // Reject only when at least one side is a USING-merged entry.
14548        // Plain-vs-plain duplicates remain admitted (slice-59 behavior).
14549        if (alreadyEmitted && (isMerged || alreadyMerged)) {
14550            return StarExpansionResult.reject(
14551                    StarExpansionKind.SYNTHETIC_BODY_CONTEXT, null,
14552                    "bare * over JOIN ... USING produces ambiguous output "
14553                            + "column '" + cand.getName() + "': a USING-merged "
14554                            + "entry and a same-named column from outside the "
14555                            + "USING equivalence class collide (or two disconnected "
14556                            + "USING classes share the same key name); qualify "
14557                            + "with t.* per relation or rename a column to "
14558                            + "disambiguate");
14559        }
14560        emittedNamesLC.add(nameLC);
14561        if (isMerged) mergedNamesLC.add(nameLC);
14562        outputs.add(cand);
14563        return null;
14564    }
14565
14566    /**
14567     * Slice 60 — read the published column names for an already-built
14568     * statement (CTE body or FROM-subquery body) from its
14569     * {@link StatementGraph#getOutputColumns()}. Used by {@code build()}
14570     * and {@code extractFromSubqueriesAsStatements} to populate the
14571     * in-scope map before each consuming SELECT's
14572     * {@code buildOutputColumns} runs.
14573     */
14574    private static List<String> outputColumnNames(StatementGraph body) {
14575        List<OutputColumn> cols = body.getOutputColumns();
14576        List<String> names = new ArrayList<>(cols.size());
14577        for (OutputColumn c : cols) names.add(c.getName());
14578        return Collections.unmodifiableList(names);
14579    }
14580
14581    /**
14582     * Slice 60 — effective alias of a TTable lower-cased, or null when
14583     * the table has neither an alias nor a name. The alias-collision
14584     * fix (codex diff-review) replaced CTE-name / subquery-alias
14585     * keying with effective-alias keying; this helper centralises the
14586     * lookup-key computation.
14587     */
14588    private static String effectiveAliasLowerCaseOrNull(TTable t) {
14589        String alias = effectiveAliasOf(t);
14590        if (alias == null || alias.isEmpty()) return null;
14591        return alias.toLowerCase(Locale.ROOT);
14592    }
14593
14594    /**
14595     * Slice 60 — build a per-consumer effective-alias-keyed map of
14596     * "FROM-clause relation alias → published column names" by walking
14597     * the consumer's direct FROM/JOIN list (single top-level TJoin,
14598     * left table + each joinItem.getTable()).
14599     *
14600     * <p>Each CTE-bound relation contributes its effective alias →
14601     * {@code ctePublishedColumns.get(cteName.toLowerCase())}. Each
14602     * FROM-subquery contributes its alias → {@code
14603     * outputColumnNames(stmts.get(subqueryAliasToIndex.get(alias)))}.
14604     * Base-table relations are skipped because their star expansion
14605     * uses the catalog path (TSQLEnv); adding them here would force
14606     * dialect-specific catalog walks before catalog access is required.
14607     *
14608     * <p>The codex diff-review found that a single name-keyed map
14609     * collides when a FROM-subquery alias equals a visible CTE name
14610     * (`WITH a AS (...) ... FROM a c JOIN (SELECT ...) a ...`). Keying
14611     * by effective alias (which is unique per FROM clause —
14612     * {@link #preflightDirectFromList} rejects duplicates) closes the
14613     * collision class.
14614     *
14615     * @param consumer              the SELECT whose FROM list to walk
14616     * @param consumerProvider      provider used only for bindRelation
14617     *                              (CTE vs TABLE discrimination)
14618     * @param ctePublishedColumns   CTE-name → columns lookup
14619     *                              populated as CTE bodies are built
14620     * @param subqueryAliasToIndex  this consumer's own subquery alias
14621     *                              → stmts index lookup
14622     * @param stmts                 already-built statement list
14623     * @return mutable effective-alias-keyed in-scope map for this
14624     *         consumer; callers wrap it via
14625     *         {@code provider.withInScopeRelationColumns(map)}
14626     */
14627    private static Map<String, List<String>> buildEffectiveAliasInScopeMap(
14628            TSelectSqlStatement consumer,
14629            NameBindingProvider consumerProvider,
14630            Map<String, List<String>> ctePublishedColumns,
14631            Map<String, Integer> subqueryAliasToIndex,
14632            List<StatementGraph> stmts) {
14633        Map<String, List<String>> result = new HashMap<>();
14634        if (consumer.joins == null) return result;
14635        for (TJoin join : consumer.joins) {
14636            addRelationToInScopeMap(join.getTable(), consumerProvider,
14637                    ctePublishedColumns, subqueryAliasToIndex, stmts, result);
14638            TJoinItemList items = join.getJoinItems();
14639            if (items == null) continue;
14640            for (int i = 0; i < items.size(); i++) {
14641                TJoinItem item = items.getJoinItem(i);
14642                if (item == null) continue;
14643                addRelationToInScopeMap(item.getTable(), consumerProvider,
14644                        ctePublishedColumns, subqueryAliasToIndex, stmts, result);
14645            }
14646        }
14647        return result;
14648    }
14649
14650    private static void addRelationToInScopeMap(
14651            TTable t,
14652            NameBindingProvider consumerProvider,
14653            Map<String, List<String>> ctePublishedColumns,
14654            Map<String, Integer> subqueryAliasToIndex,
14655            List<StatementGraph> stmts,
14656            Map<String, List<String>> result) {
14657        if (t == null) return;
14658        String key = effectiveAliasLowerCaseOrNull(t);
14659        if (key == null) return;
14660        if (t.getTableType() == gudusoft.gsqlparser.ETableSource.subquery) {
14661            Integer idx = subqueryAliasToIndex.get(key);
14662            if (idx != null) {
14663                result.put(key, outputColumnNames(stmts.get(idx)));
14664            }
14665            return;
14666        }
14667        // objectname (base-table OR CTE reference). Use bindRelation
14668        // to discriminate; base tables don't need an in-scope entry
14669        // (slice 58 catalog path handles them via getRelationColumnNames).
14670        RelationBinding b = consumerProvider.bindRelation(t);
14671        if (b == null) return;
14672        if (b.getKind() == RelationKind.CTE) {
14673            String cteName = t.getName();
14674            if (cteName == null) return;
14675            List<String> cols = ctePublishedColumns.get(cteName.toLowerCase(Locale.ROOT));
14676            if (cols != null && !cols.isEmpty()) {
14677                result.put(key, cols);
14678            }
14679        }
14680        // For TABLE, OUTER_REFERENCE, UNION, UNKNOWN bindings the
14681        // in-scope map is intentionally not populated; the
14682        // base-table catalog path or rejection path applies.
14683    }
14684
14685    /**
14686     * Build the {@link OutputColumn} list. Slice 4 lifts the
14687     * simple-object-name / single-source restriction: any expression with at
14688     * least one column reference is accepted, and the column is marked
14689     * {@link OutputColumn#isDerived()} when the expression is anything
14690     * other than a direct column reference. Slice 61 also admits
14691     * canonical constant-only projections (zero column refs) outside
14692     * scalar-subquery bodies, using alias-or-expression text naming.
14693     */
14694    private static List<OutputColumn> buildOutputColumns(TSelectSqlStatement select,
14695                                                         NameBindingProvider provider,
14696                                                         boolean allowScalarProjectionSubqueries,
14697                                                         boolean allowWindowProjection,
14698                                                         boolean isPredicateBody,
14699                                                         String stmtName) {
14700        TResultColumnList rcl = select.getResultColumnList();
14701        if (rcl == null || rcl.size() == 0) {
14702            throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.SELECT_NO_PROJECTED_COLUMNS, "SELECT has no projected columns", select));
14703        }
14704        // Slice 23/24/27: predicate-body short-circuit. The preflight
14705        // (§4.4 / slice-24 §4.1.1 / slice-27 §4.1) already validated that
14706        // the inner SELECT projects exactly one column, of an admitted
14707        // shape — constant (slice 23), simple column ref (slice 24), or
14708        // expression / function call / CASE / aggregate over inner
14709        // columns (slice 27). Discriminate on the shape:
14710        //
14711        // - Constant: bypass the regular result-column loop (which would
14712        //   reject empty-source non-aggregate projections via the
14713        //   "no column refs" guard at line ~4397) and emit one synthetic
14714        //   OutputColumn with empty sources. The synthesised name
14715        //   `<predicate_subquery_<i>>_const_0` guarantees no collision
14716        //   with real column names.
14717        //
14718        // - Slice-24 column ref (simple_object_name_t with name): fall
14719        //   through to the normal loop; effectiveOutputName(rc) returns
14720        //   the column name. OutputColumn carries name, derived=false,
14721        //   aggregate=false, sources=[ColumnRef(...)].
14722        //
14723        // - Slice-27 expression / function / CASE / aggregate without
14724        //   alias: synthesise the OutputColumn here. The normal loop's
14725        //   {@link #effectiveOutputName} would throw on rc with neither
14726        //   alias nor column name (a slice-6 invariant for OUTER
14727        //   projections); for predicate bodies the OutputColumn name is
14728        //   internal scaffolding only — no consumer references it
14729        //   externally — so a synthetic name is sound. For aggregate-over-
14730        //   constants (COUNT(*), SUM(1)) sources is empty and aggregate=true
14731        //   matches the line-4397 guard's intent. The slice-24 projector
14732        //   pass walks OutputColumn.sources to base-column terminals and
14733        //   emits JOIN canonical edges (zero terminals → zero edges,
14734        //   multi-source → multiple edges).
14735        if (isPredicateBody) {
14736            TResultColumn rc0 = rcl.getResultColumn(0);
14737            if (rc0.getExpr() != null && isConstantExpression(rc0.getExpr())) {
14738                String synthName = (stmtName != null ? stmtName : "<predicate_subquery_?>")
14739                        + "_const_0";
14740                return Collections.singletonList(new OutputColumn(
14741                        synthName, /*derived=*/ true, /*aggregate=*/ false,
14742                        Collections.<ColumnRef>emptyList(), /*windowSpec=*/ null));
14743            }
14744            // Slice 27 + Slice 32: synthesise the OutputColumn for any
14745            // slice-27/31-admitted predicate-body projection EXCEPT the
14746            // slice-24 simple_object_name_t shape. Slice 27 fired this
14747            // branch only when both alias AND columnNameOnly were absent
14748            // (missingName=true); slice 32 widens it to also fire when
14749            // alias is present, so aliased Oracle / MSSQL plain
14750            // {@code LISTAGG(x.id, ',') WITHIN GROUP (ORDER BY ...) AS lst}
14751            // is admitted (the slice-31 boundary lifted by slice 32).
14752            //
14753            // The simple_object_name_t exclusion is intentional. That
14754            // shape MUST keep falling through to the normal loop, where
14755            // {@link #effectiveOutputName} returns the column name (or
14756            // alias if present), {@code derived=false}, and
14757            // {@code sources=[ColumnRef(...)]} — the slice-24 baseline.
14758            // Per {@code TResultColumn.getColumnNameOnly()}, only
14759            // {@code simple_object_name_t}, {@code typecast_t}, and
14760            // {@code sqlserver_proprietary_column_alias_t} populate
14761            // columnNameOnly; function_t / case_t / pure-binary all
14762            // return empty, so the cascade below is alias > _proj_0
14763            // (no columnNameOnly intermediate).
14764            if (rc0.getExpr() != null
14765                    && rc0.getExpr().getExpressionType() != EExpressionType.simple_object_name_t) {
14766                String alias = rc0.getColumnAlias();
14767                String name;
14768                if (alias != null && !alias.isEmpty()) {
14769                    // Slice 32 widening: aliased projection. Use the
14770                    // alias as the OutputColumn name.
14771                    name = alias;
14772                } else {
14773                    // Slice 27 carryover: unaliased non-column-ref
14774                    // projection. Synthesise a stable name. The synth
14775                    // name is used internally by
14776                    // {@link gudusoft.gsqlparser.ir.semantic.diff.SemanticIRProjector}
14777                    // (line ~161 — BFS start key keyed by
14778                    // {@code stmtOutputKey(idx, out.getName())}) to walk
14779                    // predicate-body lineage to base columns; uniqueness
14780                    // within the single-column predicate body is
14781                    // sufficient for that walk. {@code _proj_0} is also
14782                    // exposed by the JSON exporter but is not externally
14783                    // referenced by callers — only the inner JOIN
14784                    // canonical edges (target.column omitted; role=JOIN)
14785                    // are visible to consumers.
14786                    name = (stmtName != null ? stmtName : "<predicate_subquery_?>")
14787                            + "_proj_0";
14788                }
14789                boolean aggregate = isAggregateFunction(rc0.getExpr());
14790                // Slice 43 / 44: PG (slice 43) and Snowflake (slice 44)
14791                // hypothetical-set ordered-set aggregates ({@code rank} /
14792                // {@code dense_rank} / {@code percent_rank} /
14793                // {@code cume_dist}) via direct {@code fn.getWithinGroup()}
14794                // attachment do not satisfy
14795                // {@link #isHypotheticalSetWithinGroupCall} (which requires
14796                // a non-null windowDef) and are not in the regular
14797                // {@link #AGGREGATE_FUNCTION_NAMES} whitelist. Inside the
14798                // predicate-body branch they are admitted as aggregates
14799                // when the slice-43 / 44 vendor-gated shape predicate
14800                // fires — contained here (NOT folded into
14801                // {@code isAggregateFunction}) so the carve-out cannot
14802                // accidentally lift the top-level PG / Snowflake case
14803                // (whose dlineage XML is structurally identical to the
14804                // OVER form — see Slice43Test / Slice44Test javadoc).
14805                if (!aggregate
14806                        && rc0.getExpr().getExpressionType() == EExpressionType.function_t
14807                        && isDirectAttachmentHypotheticalSetCall(
14808                                rc0.getExpr().getFunctionCall(), select.dbvendor)) {
14809                    aggregate = true;
14810                }
14811                // Slice 28: FILTER-aware collector excludes column refs inside
14812                // FILTER (WHERE ...) subtrees so OutputColumn.sources matches
14813                // dlineage's lineage-relationship view (FILTER predicate refs
14814                // absent from fdd / fdr).
14815                // Slice 31: also excludes column refs inside Oracle / MSSQL
14816                // {@code fn.windowDef.withinGroup} (the WITHIN GROUP ORDER BY)
14817                // so plain {@code LISTAGG(x.id, ',') WITHIN GROUP (ORDER BY x.region)}
14818                // emits sources=[x.id] only — matching dlineage's omission of the
14819                // WITHIN GROUP ORDER BY ref from {@code fdr clause="on"} sources
14820                // (probe Q1 in {@code /tmp/probe31}). Slice 32 reuses the
14821                // same collector unchanged.
14822                List<ColumnRef> sources = collectColumnRefsExcludingFilterAndWithinGroupClauses(rc0, provider);
14823                if (sources.isEmpty() && !aggregate) {
14824                    // Non-aggregate with no inner column refs: should be
14825                    // covered by the constant short-circuit above. If we
14826                    // reach here, fall through to the normal loop's
14827                    // line-4397 guard for a conservative tuned message.
14828                } else {
14829                    return Collections.singletonList(new OutputColumn(
14830                            name, /*derived=*/ true, aggregate,
14831                            sources, /*windowSpec=*/ null));
14832                }
14833            }
14834            // simple_object_name_t falls through (slice-24 carryover):
14835            // the normal loop produces derived=false /
14836            // sources=[ColumnRef(...)] using effectiveOutputName.
14837        }
14838        // Slice 19 (alias-bound PARTITION BY discriminator): the resolver
14839        // synthesises EXACT_MATCH bindings for PARTITION BY <name> when no
14840        // schema metadata is available (TableNamespace.resolveColumn
14841        // inferred_from_usage fallback), even when <name> is a SELECT-list
14842        // alias on a calculated expression. The discriminator is exposed
14843        // by NameBindingProvider#isCalculatedProjectionAliasFallback and
14844        // consulted in buildWindowPartitionRefs / buildWindowOrderRefs;
14845        // see Slice13Test#partitionByExpressionAliasIsRejectedAsAliasBound
14846        // and the shadowing-with-metadata companion. Slice 19 prefers
14847        // conservative rejection in the no-metadata case; with TSQLEnv
14848        // declaring the shadowed column, ColumnSource#hasDefiniteEvidence
14849        // returns true and the discriminator falls through.
14850        List<OutputColumn> out = new ArrayList<>(rcl.size());
14851        for (int i = 0; i < rcl.size(); i++) {
14852            TResultColumn rc = rcl.getResultColumn(i);
14853            if (rc.getExpr() == null) {
14854                throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.RESULT_COLUMN_NULL_EXPRESSION, "result column " + rc + " has null expression", rc));
14855            }
14856            EExpressionType type = rc.getExpr().getExpressionType();
14857            // Slice 58: catalog-backed star expansion for a single base
14858            // table. Star projections were rejected by slices 1-57 with
14859            // "SELECT * / list expansions are deferred"; slice 58 lifts
14860            // the single-base-table case when a catalog is available via
14861            // NameBindingProvider#getRelationColumnNames(TTable). Bare
14862            // `*` and qualified `t.*` both arrive here as
14863            // simple_object_name_t with rc.getColumnNameOnly() == "*"
14864            // (probed; see slice-58 plan); the prior EExpressionType.list_t
14865            // branch is dead defense for stars in practice but stays in
14866            // case a future grammar variant routes them differently.
14867            String colNameOnly = rc.getColumnNameOnly();
14868            if ("*".equals(colNameOnly) || type == EExpressionType.list_t) {
14869                StarExpansionResult exp = tryExpandStar(rc, select, provider,
14870                        isPredicateBody, stmtName);
14871                switch (exp.kind) {
14872                    case EXPANDED:
14873                        out.addAll(exp.columns);
14874                        continue;
14875                    case PREDICATE_BODY_GUARD:
14876                        // Defensive; preflightExistsInnerShape at line ~3880
14877                        // rejects SELECT * in EXISTS earlier with a tuned
14878                        // message. This branch only fires if a future call
14879                        // site enters buildOutputColumns with isPredicateBody
14880                        // and a star still present.
14881                        throw new SemanticIRBuildException(
14882                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_PREDICATE_BODY,
14883                                "result column " + rc + " is a star expansion (SELECT *) "
14884                                        + "inside a predicate body; not supported yet", rc));
14885                    case SYNTHETIC_BODY_CONTEXT:
14886                        // Slice 59: star expansion is rejected inside a
14887                        // synthetic body (scalar-subquery / set-op-branch /
14888                        // predicate-subquery). Multi-column expansion would
14889                        // violate the body's shape contract; the slice-58
14890                        // path could silently produce this for
14891                        // catalog-equipped builds.
14892                        throw new SemanticIRBuildException(
14893                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_SYNTHETIC_BODY,
14894                                "result column " + rc + " is a star expansion (SELECT *); "
14895                                        + (exp.detail != null ? exp.detail
14896                                                : "star expansion is not supported inside a synthetic body"), rc));
14897                    case MULTI_RELATION_FROM:
14898                        // Slice 59: defensive catch-all. Normal comma-FROM
14899                        // is rejected earlier by buildRelations:~3042 with
14900                        // a clearer message; reaching this kind indicates
14901                        // a missing top-level TJoin or null FROM table.
14902                        throw new SemanticIRBuildException(
14903                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_MULTI_RELATION_FROM,
14904                                "result column " + rc + " is a star expansion (SELECT *); "
14905                                        + "FROM source could not be determined "
14906                                        + "(comma-FROM is rejected earlier with a clearer message)", rc));
14907                    case NON_BASE_TABLE_RELATION:
14908                        throw new SemanticIRBuildException(
14909                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_NON_BASE_TABLE,
14910                                "result column " + rc + " is a star expansion (SELECT *); "
14911                                        + (exp.detail != null ? exp.detail
14912                                                : "FROM source must be a base table"), rc));
14913                    case QUALIFIER_NOT_FOUND:
14914                        throw new SemanticIRBuildException(
14915                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_QUALIFIER_NOT_FOUND,
14916                                "result column " + rc + " (qualified star "
14917                                        + (exp.qualifier == null ? "?" : exp.qualifier)
14918                                        + ".*) does not match any FROM-clause relation", rc));
14919                    case QUALIFIER_AMBIGUOUS:
14920                        // Slice 59: 2+ FROM relations have the same
14921                        // effective alias. Real SQL never reaches this
14922                        // unless rejectDuplicateAliases:~5621 (case-
14923                        // sensitive) allowed a case-only collision.
14924                        throw new SemanticIRBuildException(
14925                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_QUALIFIER_AMBIGUOUS,
14926                                "result column " + rc + " (qualified star "
14927                                        + (exp.qualifier == null ? "?" : exp.qualifier)
14928                                        + ".*) is ambiguous: "
14929                                        + (exp.detail != null ? exp.detail
14930                                                : "multiple FROM-clause relations match"), rc));
14931                    case NO_CATALOG_OR_UNKNOWN_TABLE:
14932                        // Slice 58 single-FROM message kept stable; slice 59
14933                        // names the failing relation when known
14934                        // (exp.detail carries the relation's effective alias).
14935                        String relationLabel;
14936                        if (exp.qualifier != null && !exp.qualifier.isEmpty()) {
14937                            relationLabel = exp.qualifier;
14938                        } else if (exp.detail != null && !exp.detail.isEmpty()) {
14939                            relationLabel = exp.detail;
14940                        } else {
14941                            relationLabel = "the FROM relation";
14942                        }
14943                        throw new SemanticIRBuildException(
14944                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_NO_CATALOG,
14945                                "result column " + rc + " is a star expansion (SELECT *); "
14946                                        + "requires catalog with column declarations for "
14947                                        + relationLabel, rc));
14948                    case EXPLICIT_CTE_COLUMN_LIST_DEFERRED:
14949                        // Slice 103 lifted the explicit-CTE-column-list
14950                        // deferral: the SELECT-side CTE walker now runs
14951                        // the slice-102 rename helper, so the in-scope
14952                        // map publishes the renamed columns and
14953                        // expandSingleRelation returns EXPANDED instead
14954                        // of falling into this arm. The case is kept
14955                        // declared-but-unreached for API stability and
14956                        // exhaustive-switch coverage (slice 71/72/82/86
14957                        // /95/96/97/98/99/100/101/102 precedent). If a
14958                        // future call path re-introduces the kind, the
14959                        // throw still fires with a faithful diagnostic.
14960                        throw new SemanticIRBuildException(
14961                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_EXPLICIT_CTE_COLUMN_LIST,
14962                                "result column " + rc + " is a star expansion (SELECT *); "
14963                                        + (exp.detail != null ? exp.detail
14964                                                : "star expansion through an explicit CTE column list is deferred to a future slice"), rc));
14965                    case NO_INSCOPE_RELATION_COLUMNS:
14966                        // Slice 60: builder invariant failure — a CTE
14967                        // or FROM-subquery body was not registered in
14968                        // the provider's in-scope-relation-columns map
14969                        // before this consumer ran. User SQL cannot
14970                        // reach this kind under normal build()
14971                        // execution; reaching it indicates a missing
14972                        // call site is not narrowing the provider
14973                        // before invoking buildOutputColumns.
14974                        throw new SemanticIRBuildException(
14975                                Diagnostic.error(DiagnosticCode.STAR_EXPANSION_NO_INSCOPE_RELATION_COLUMNS,
14976                                "result column " + rc + " is a star expansion (SELECT *); "
14977                                        + (exp.detail != null ? exp.detail
14978                                                : "in-scope CTE/subquery column map is empty for this relation (builder invariant failure)"), rc));
14979                    // No `default`: switch is intentionally exhaustive
14980                    // over StarExpansionKind. The post-switch throw
14981                    // below is the actual runtime guard if a future
14982                    // enum value is added without updating this
14983                    // switch.
14984                }
14985                throw new SemanticIRBuildException(
14986                        Diagnostic.error(DiagnosticCode.STAR_EXPANSION_UNHANDLED_KIND,
14987                        "result column " + rc + " is a star expansion (SELECT *); "
14988                                + "unhandled StarExpansionKind=" + exp.kind, rc));
14989            }
14990            // Top-level scalar subquery in projection (slice 11). When the
14991            // caller permits it (allowScalarProjectionSubqueries=true), the
14992            // outer caller has already extracted the inner SELECT as its
14993            // own statement via extractScalarSubqueriesAsStatements; here
14994            // we just construct the OutputColumn shell with empty sources
14995            // and let emitLineageForStatement wire the
14996            // STATEMENT_OUTPUT → STATEMENT_OUTPUT edge.
14997            if (type == EExpressionType.subquery_t) {
14998                if (!allowScalarProjectionSubqueries) {
14999                    throw new SemanticIRBuildException(
15000                            Diagnostic.error(DiagnosticCode.NESTED_SCALAR_SUBQUERY_IN_PROJECTION,
15001                            "nested scalar subquery in projection (inside another "
15002                                    + "scalar subquery body or FROM-clause subquery body) "
15003                                    + "is not supported yet", rc));
15004                }
15005                String alias = rc.getColumnAlias();
15006                if (alias == null || alias.isEmpty()) {
15007                    throw new SemanticIRBuildException(
15008                            Diagnostic.error(DiagnosticCode.SCALAR_SUBQUERY_ALIAS_REQUIRED,
15009                            "scalar subquery projection must have an alias", rc));
15010                }
15011                out.add(new OutputColumn(alias, /*derived=*/ true,
15012                        /*aggregate=*/ false,
15013                        Collections.<ColumnRef>emptyList(),
15014                        /*windowSpec=*/ null));
15015                continue;
15016            }
15017            // Slice 13: detect top-level window function before deep scans
15018            // so the embedded-window rejecter can identity-skip the
15019            // legitimate top-level window function call.
15020            boolean topLevelWindow = isTopLevelWindowProjection(rc.getExpr());
15021            // Slice 33: detect Oracle / MSSQL plain WITHIN-GROUP-only
15022            // aggregate at the projection root. When admitted, the root
15023            // function carries fn.windowDef!=null but is the legitimate
15024            // top-level form — the slice-13 invariant rejecters
15025            // (isTopLevelWindowProjection / rejectWindowFunctions /
15026            // rejectEmbeddedWindowFunction) keep their strict wd!=null
15027            // check unchanged; this local boolean is what discriminates
15028            // them. The admission helper combines:
15029            //   - isWithinGroupOnlyWindowDef (no OVER, no KEEP DENSE_RANK)
15030            //   - explicit EDbVendor gate (Oracle / MSSQL only — mirrors
15031            //     the slice-31 predicate-body gate at line ~3860)
15032            //   - function name in AGGREGATE_FUNCTION_NAMES whitelist
15033            // PG / Snowflake / DB2 / SparkSQL produce direct fn.withinGroup
15034            // (windowDef=null) and never reach this admission helper; their
15035            // top-level WG already builds today via the normal aggregate
15036            // path (with pre-existing AGGREGATION_MISMATCH divergence on
15037            // the dlineage projector side that slice 33 deliberately does
15038            // not address — see the slice-30 rationale on
15039            // ORDER_BY_WITHIN_GROUP_AGGREGATE_NAMES for why a name-only
15040            // projector override is unsafe across the dual-form aggregates
15041            // SUM / MIN / MAX / LISTAGG that have OVER (PARTITION BY)
15042            // forms on Oracle).
15043            TFunctionCall slice33RootFn = rc.getExpr().getExpressionType() == EExpressionType.function_t
15044                    ? rc.getExpr().getFunctionCall()
15045                    : null;
15046            boolean slice33TopLevelWG = isAdmittedTopLevelWithinGroupAggregate(
15047                    slice33RootFn, select.dbvendor);
15048            boolean slice35TopLevelDirectWG = isAdmittedTopLevelDirectWithinGroupAggregate(
15049                    slice33RootFn, select.dbvendor);
15050            // Reject scalar subqueries embedded inside larger projection
15051            // expressions (slice 11 + codex round-2 MUST 7). Catches both
15052            // top-level subquery_t hidden under a wrapping expression
15053            // (e.g. UPPER((SELECT ...)) — though the parser sometimes
15054            // strips the wrap) AND predicate subqueries that don't surface
15055            // as subquery_t (EXISTS in projection, IN-projection).
15056            // Slice 9/10 deep-scan pattern.
15057            rejectEmbeddedSubqueryInProjection(rc.getExpr(), rc);
15058            // Slice 13: reject window functions embedded inside larger
15059            // projection expressions (e.g. `ROW_NUMBER() OVER (...) + 1`,
15060            // `UPPER(LAG(...) OVER (...))`). The helper identity-skips
15061            // the legitimate top-level window function call when
15062            // `topLevelWindow=true`.
15063            //
15064            // Slice 33: also identity-skip the top-level WITHIN-GROUP-only
15065            // aggregate root. TFunctionCall.acceptChildren preVisits the
15066            // root function (TFunctionCall.java:1528), so without
15067            // skipTopLevel=true the visitor would catch the slice-33-
15068            // admitted root (fn.windowDef!=null). Embedded WG inside
15069            // UPPER / CASE still rejects because the visitor finds a
15070            // non-root function whose windowDef!=null — the inner
15071            // function is not == identity to the root, so the skip
15072            // doesn't apply.
15073            rejectEmbeddedWindowFunction(rc.getExpr(), rc, topLevelWindow || slice33TopLevelWG);
15074            // Slice 33/35 fast path: WITHIN-GROUP-only aggregate — fall
15075            // through to the normal aggregate path. Oracle / MSSQL use the
15076            // windowDef attachment (slice 33); PostgreSQL direct attachment
15077            // is already on the normal aggregate path but shares the
15078            // unaliased expression-text fallback below (slice 35).
15079            if (slice33TopLevelWG || slice35TopLevelDirectWG) {
15080                // No special branch — fall through to the plain aggregate
15081                // / expression / column path below.
15082            } else if (topLevelWindow) {
15083                if (!allowWindowProjection) {
15084                    throw new SemanticIRBuildException(
15085                            Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_AS_PROJECTION_NOT_SUPPORTED,
15086                            "result column " + rc + " is a window function; not supported "
15087                                    + "inside this body (e.g. scalar-subquery body)", rc));
15088                }
15089                out.add(buildWindowOutputColumn(rc, select, provider));
15090                continue;
15091            }
15092            // Plain aggregate / expression / column path. The
15093            // rejectWindowFunctions call below is now defensive — the
15094            // top-level-window fast path above intercepts legitimate
15095            // windows, and rejectEmbeddedWindowFunction caught any
15096            // descendant window functions.
15097            //
15098            // Slice 33: skip rejectWindowFunctions for the slice-33-
15099            // admitted shape. The root function has windowDef!=null but
15100            // is the legitimate top-level form; calling
15101            // rejectWindowFunctions here would reject it via the
15102            // strict-wd!=null check (kept unchanged per slice-31
15103            // invariant).
15104            if (!slice33TopLevelWG && !slice35TopLevelDirectWG) {
15105                rejectWindowFunctions(rc.getExpr(), rc);
15106            }
15107            boolean derived = (type != EExpressionType.simple_object_name_t);
15108            boolean aggregate = isAggregateFunction(rc.getExpr());
15109            // Slice 28: FILTER-aware collector excludes column refs inside
15110            // FILTER (WHERE ...) subtrees so OutputColumn.sources matches
15111            // dlineage's lineage-relationship view (FILTER predicate refs
15112            // absent from fdd / fdr).
15113            // Slice 31: also excludes column refs inside Oracle / MSSQL
15114            // {@code fn.windowDef.withinGroup} so plain WITHIN GROUP
15115            // aggregates emit sources from function args only. Defense-
15116            // in-depth here: the slice-31 lift only admits Oracle / MSSQL
15117            // plain WITHIN GROUP at the unaliased predicate-body
15118            // short-circuit (line ~5216) — the strict
15119            // {@link #rejectWindowFunctions} call above keeps top-level
15120            // windowDef-bearing projections rejected outside the
15121            // predicate-body context, so this collector reduces to the
15122            // slice-28 FILTER-only variant in practice today.
15123            List<ColumnRef> sources = collectColumnRefsExcludingFilterAndWithinGroupClauses(rc, provider);
15124            if (sources.isEmpty() && !aggregate) {
15125                boolean canonicalConstant = isConstantExpression(rc.getExpr());
15126                boolean inScalarBody = isScalarSyntheticName(stmtName);
15127                if (canonicalConstant && !inScalarBody) {
15128                    // Slice 61: constant-only projection lift. Predicate
15129                    // bodies still use the earlier slice-23 short-circuit,
15130                    // while scalar-subquery bodies intentionally keep the
15131                    // slice-11/20 invariant that scalar body projections
15132                    // must have a column source.
15133                    String alias = rc.getColumnAlias();
15134                    String name = (alias != null && !alias.isEmpty())
15135                            ? alias
15136                            : rc.getExpr().toString();
15137                    out.add(new OutputColumn(name, /*derived=*/ true,
15138                            /*aggregate=*/ false,
15139                            Collections.<ColumnRef>emptyList(),
15140                            /*windowSpec=*/ null));
15141                    continue;
15142                }
15143                throw new SemanticIRBuildException(
15144                        Diagnostic.error(DiagnosticCode.RESULT_COLUMN_NO_COLUMN_REFS,
15145                        "result column " + rc + " has no column references "
15146                                + "and is not a constant or aggregate expression "
15147                                + "(e.g. UPPER('literal') / CAST / current_date - not supported yet)", rc));
15148            }
15149            // Slice 34: when the slice-33-admitted top-level Oracle / MSSQL
15150            // WITHIN-GROUP-only aggregate has no alias, fall back to the
15151            // parser's expression text. {@code effectiveOutputName} would
15152            // throw "neither alias nor column name" because
15153            // {@code function_t} returns "" from getColumnNameOnly().
15154            // Probe-verified that {@code rc.getExpr().toString()} byte-
15155            // matches dlineage's <select_list> column name attribute on
15156            // Oracle / MSSQL for this shape, so canonical SELECT-edge
15157            // outputName remains in parity with no projector change.
15158            // Gated tightly on slice33TopLevelWG so unrelated unaliased
15159            // shapes (function calls / CASE / expressions outside the
15160            // slice-33 admit set) keep failing loudly via
15161            // effectiveOutputName until each is probed and admitted
15162            // explicitly. See Slice34Test.
15163            String name;
15164            if (slice33TopLevelWG || slice35TopLevelDirectWG) {
15165                String alias = rc.getColumnAlias();
15166                name = (alias != null && !alias.isEmpty())
15167                        ? alias
15168                        : rc.getExpr().toString();
15169            } else {
15170                name = effectiveOutputName(rc);
15171            }
15172            out.add(new OutputColumn(name, derived, aggregate, sources, /*windowSpec=*/ null));
15173        }
15174        return out;
15175    }
15176
15177    /**
15178     * Reject scalar subqueries embedded inside larger projection
15179     * expressions (slice 11). Catches:
15180     *
15181     * <ul>
15182     *   <li>{@code SELECT UPPER((SELECT MAX(salary) AS m FROM employees))
15183     *       AS x FROM ...} — scalar nested inside a function call.</li>
15184     *   <li>{@code SELECT EXISTS (SELECT 1 FROM employees) AS has_emp
15185     *       FROM ...} — EXISTS doesn't surface as
15186     *       {@link EExpressionType#subquery_t} but carries
15187     *       {@code getSubQuery() != null} (slice-9 round-3 lesson).</li>
15188     *   <li>Other in-expression subqueries that
15189     *       {@link #collectColumnRefs} would otherwise descend into.</li>
15190     * </ul>
15191     *
15192     * <p>Only top-level {@code subquery_t} projections are extracted as
15193     * separate statements (handled in
15194     * {@link #extractScalarSubqueriesAsStatements}); embedded subqueries
15195     * remain rejected because the IR doesn't yet model the "expression
15196     * over subquery result" shape.
15197     */
15198    private static void rejectEmbeddedSubqueryInProjection(TExpression expr, TResultColumn rc) {
15199        if (expr == null) return;
15200        final boolean[] found = {false};
15201        expr.acceptChildren(new TParseTreeVisitor() {
15202            @Override
15203            public void preVisit(TExpression e) {
15204                if (found[0]) return;
15205                if (e.getExpressionType() == EExpressionType.subquery_t
15206                        || e.getSubQuery() != null) {
15207                    found[0] = true;
15208                }
15209            }
15210        });
15211        if (!found[0]) {
15212            // Top-level expression itself may carry a subquery (e.g. EXISTS
15213            // at the projection root, where rc.getExpr() is exists_t with
15214            // non-null getSubQuery() but is NOT subquery_t — so the
15215            // top-level subquery_t branch above didn't extract it).
15216            if (expr.getExpressionType() != EExpressionType.subquery_t
15217                    && expr.getSubQuery() != null) {
15218                found[0] = true;
15219            }
15220        }
15221        if (found[0]) {
15222            throw new SemanticIRBuildException(
15223                    Diagnostic.error(DiagnosticCode.RESULT_COLUMN_SCALAR_SUBQUERY_EMBEDDED,
15224                    "result column " + rc + " contains a scalar subquery embedded "
15225                            + "in a larger projection expression; not supported yet "
15226                            + "(only top-level scalar subquery projections are extracted)", rc));
15227        }
15228    }
15229
15230    /**
15231     * Detect whether an expression contains an aggregate function call
15232     * anywhere in its subtree. Slice 6 uses a name whitelist via
15233     * {@link #AGGREGATE_FUNCTION_NAMES}. Walking recursively means
15234     * {@code SUM(salary) + 1} and {@code COUNT(*) + 1} are both classified
15235     * as aggregate (and thus permitted with empty sources via
15236     * {@link #buildOutputColumns}). Wrapped in a helper so slice 7+ can
15237     * swap in deeper detection (e.g. vendor-specific function classification
15238     * on TFunctionCall) without touching call sites.
15239     *
15240     * <p>Note on aggregate literals like {@code COUNT(1)} or {@code SUM(1)}:
15241     * the visitor finds no column refs, so {@code sources=[]}. Slice 6
15242     * permits these as aggregates with no lineage edges; consumers must
15243     * read {@link OutputColumn#isAggregate()} to know the value is
15244     * row-collapsing without column lineage.
15245     */
15246    private static boolean isAggregateFunction(TExpression expr) {
15247        if (expr == null) return false;
15248        // Slice 13: short-circuit for top-level window function. The
15249        // upstream `rejectEmbeddedWindowFunction` has already rejected any
15250        // embedded window functions, but this short-circuit ensures
15251        // `AVG(salary) OVER (...)` is never classified as an aggregate
15252        // even if it somehow slips past the upstream guard.
15253        //
15254        // Slice 31: discriminate WITHIN-GROUP-only windowDef (Oracle /
15255        // MSSQL plain WITHIN GROUP attachment without OVER) so
15256        // `LISTAGG(x.id, ',') WITHIN GROUP (ORDER BY x.region)` stays
15257        // classified as an aggregate. Uses {@link #isWindowDefBearingFunction}
15258        // — only this check and {@link #containsWindowFunction} are
15259        // lifted; every other slice-13 invariant rejecter is unchanged.
15260        if (expr.getExpressionType() == EExpressionType.function_t) {
15261            TFunctionCall rootFn = expr.getFunctionCall();
15262            // Slice 42: hypothetical-set ordered-set aggregate root
15263            // (Oracle / MSSQL {@code RANK(100) WITHIN GROUP (ORDER BY x)})
15264            // — short-circuit aggregate=true. The shape predicate
15265            // {@link #isHypotheticalSetWithinGroupCall} requires WITHIN-
15266            // GROUP-only windowDef AND a name in
15267            // {@link #HYPOTHETICAL_SET_AGGREGATE_NAMES}, so PG direct
15268            // attachment ({@code fn.getWindowDef()==null}) and OVER-
15269            // bearing forms cannot fire it.
15270            if (isHypotheticalSetWithinGroupCall(rootFn)) {
15271                return true;
15272            }
15273            if (isWindowDefBearingFunction(rootFn)) {
15274                return false;
15275            }
15276        }
15277        final boolean[] found = {false};
15278        expr.acceptChildren(new TParseTreeVisitor() {
15279            @Override
15280            public void preVisit(TFunctionCall fn) {
15281                if (found[0]) return;
15282                // Slice 13 codex round-2 SHOULD 3: skip windowed function
15283                // calls inside the visitor too, defensively. Upstream
15284                // rejection should already have fired, but this removes
15285                // overlap risk for `sum/count/avg`.
15286                //
15287                // Slice 31: same WITHIN-GROUP-only carve-out as the root
15288                // short-circuit above so an Oracle / MSSQL plain WITHIN
15289                // GROUP aggregate nested inside CASE/UPPER (slice-27
15290                // admit) is still picked up as aggregate.
15291                //
15292                // Slice 42: hypothetical-set ordered-set aggregate carve-
15293                // out — descendants matching the shape predicate count
15294                // as aggregate (defense-in-depth; the slice-13 embedded-
15295                // window rejecter already fires on inner WG-bearing
15296                // calls, so this branch is mostly unreachable today).
15297                if (isHypotheticalSetWithinGroupCall(fn)) {
15298                    found[0] = true;
15299                    return;
15300                }
15301                if (isWindowDefBearingFunction(fn)) return;
15302                if (fn.getFunctionName() == null) return;
15303                String name = fn.getFunctionName().toString();
15304                if (name == null || name.isEmpty()) return;
15305                if (AGGREGATE_FUNCTION_NAMES.contains(name.toLowerCase(Locale.ROOT))) {
15306                    found[0] = true;
15307                }
15308            }
15309        });
15310        // The root expression itself is not visited by acceptChildren — only
15311        // its children. If the root is the function call (the common case
15312        // for `SUM(salary)` with no enclosing arithmetic), check it too.
15313        if (!found[0] && expr.getExpressionType() == EExpressionType.function_t) {
15314            TFunctionCall fn = expr.getFunctionCall();
15315            if (fn != null && fn.getFunctionName() != null) {
15316                String name = fn.getFunctionName().toString();
15317                if (name != null && !name.isEmpty()
15318                        && AGGREGATE_FUNCTION_NAMES.contains(name.toLowerCase(Locale.ROOT))) {
15319                    found[0] = true;
15320                }
15321            }
15322        }
15323        return found[0];
15324    }
15325
15326    /**
15327     * Reject window-function projections like {@code AVG(salary) OVER (...)}.
15328     * In the GSP AST these still parse as {@code function_t} with a
15329     * non-null {@code TFunctionCall.getWindowDef()}, but their semantics
15330     * are row-preserving (analytic), not row-collapsing (aggregate). Slice
15331     * 6 owns plain GROUP BY aggregation only; window functions deserve
15332     * their own slice.
15333     */
15334    private static void rejectWindowFunctions(TExpression expr, TResultColumn rc) {
15335        if (expr == null) return;
15336        final boolean[] found = {false};
15337        expr.acceptChildren(new TParseTreeVisitor() {
15338            @Override
15339            public void preVisit(TFunctionCall fn) {
15340                if (found[0]) return;
15341                if (fn.getWindowDef() != null) found[0] = true;
15342            }
15343        });
15344        if (!found[0] && expr.getExpressionType() == EExpressionType.function_t) {
15345            TFunctionCall fn = expr.getFunctionCall();
15346            if (fn != null && fn.getWindowDef() != null) found[0] = true;
15347        }
15348        if (found[0]) {
15349            throw new SemanticIRBuildException(
15350                    Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_USED_NOT_SUPPORTED,
15351                    "result column " + rc + " uses a window function (OVER (...)); not supported yet", rc));
15352        }
15353    }
15354
15355    /**
15356     * Slice 13: detect whether the projection root is a top-level
15357     * window-function call. Returns {@code true} iff
15358     * {@code expr.getExpressionType() == function_t} AND the function
15359     * call carries a non-null {@code TWindowDef}. The result drives
15360     * three things in {@link #buildOutputColumns}:
15361     *
15362     * <ul>
15363     *   <li>The {@code skipTopLevel} arg to
15364     *       {@link #rejectEmbeddedWindowFunction} so the legitimate
15365     *       top-level window call is identity-skipped during embedded
15366     *       detection.</li>
15367     *   <li>The fast-path dispatch into
15368     *       {@link #buildWindowOutputColumn} when window projections
15369     *       are allowed.</li>
15370     *   <li>The scalar-body / future-context rejection when window
15371     *       projections are forbidden in the surrounding context
15372     *       (slice-13 {@code allowWindowProjection=false}).</li>
15373     * </ul>
15374     */
15375    private static boolean isTopLevelWindowProjection(TExpression expr) {
15376        if (expr == null) return false;
15377        if (expr.getExpressionType() != EExpressionType.function_t) return false;
15378        TFunctionCall fn = expr.getFunctionCall();
15379        return fn != null && fn.getWindowDef() != null;
15380    }
15381
15382    /**
15383     * Slice 13: reject window functions embedded inside a larger
15384     * projection expression (mirrors slice 11's
15385     * {@link #rejectEmbeddedSubqueryInProjection}). The {@code skipTopLevel}
15386     * flag is set when the caller has identified
15387     * {@code expr} as a legitimate top-level window-function projection
15388     * — without identity-skipping that exact {@code TFunctionCall} the
15389     * visitor would reject every valid top-level window.
15390     *
15391     * <p>Visitor-only (no post-visitor fallback): unlike
15392     * {@code subquery_t} which can be wrapped in expression types that
15393     * do not surface as {@code subquery_t}, a window function is always
15394     * reachable through {@code TExpression.acceptChildren} →
15395     * {@code TFunctionCall.preVisit}. Codex round-3 MUST 1.
15396     */
15397    private static void rejectEmbeddedWindowFunction(TExpression expr,
15398                                                     TResultColumn rc,
15399                                                     boolean skipTopLevel) {
15400        if (expr == null) return;
15401        final TFunctionCall topLevelFn =
15402                skipTopLevel
15403                        && expr.getExpressionType() == EExpressionType.function_t
15404                        ? expr.getFunctionCall()
15405                        : null;
15406        final boolean[] found = {false};
15407        expr.acceptChildren(new TParseTreeVisitor() {
15408            @Override
15409            public void preVisit(TFunctionCall fn) {
15410                if (found[0]) return;
15411                if (fn == topLevelFn) return;            // identity-skip
15412                if (fn.getWindowDef() != null) found[0] = true;
15413            }
15414        });
15415        if (found[0]) {
15416            throw new SemanticIRBuildException(
15417                    Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_EMBEDDED_NOT_SUPPORTED,
15418                    "result column " + rc + " contains a window function embedded "
15419                            + "in a larger projection expression; not supported yet "
15420                            + "(only top-level window-function projections are supported)", rc));
15421        }
15422    }
15423
15424    /**
15425     * Lower-cased function names accepted as window functions in slice 13.
15426     * Includes every name in {@link #AGGREGATE_FUNCTION_NAMES} (aggregates
15427     * can be windowed: {@code SUM(...) OVER (...)}, {@code AVG(...) OVER (...)},
15428     * etc.) plus the analytic-only names. New analytic functions must be
15429     * added here explicitly to avoid silent acceptance of an unfamiliar
15430     * window function whose semantics the slice does not yet model.
15431     *
15432     * <p>Slice 30 exception: {@code mode} is added to
15433     * {@link #AGGREGATE_FUNCTION_NAMES} for the WITHIN GROUP path but
15434     * REMOVED from this allowlist via {@code s.remove("mode")} below —
15435     * {@code mode()} has no documented window form in any GSP-supported
15436     * vendor and the explicit removal keeps {@code mode() OVER (...)}
15437     * (which the PostgreSQL parser accepts) rejected by
15438     * {@code buildWindowOutputColumn}.
15439     */
15440    private static final Set<String> WINDOW_FUNCTION_NAMES;
15441    static {
15442        Set<String> s = new HashSet<>();
15443        // Aggregate names that can be windowed.
15444        s.addAll(AGGREGATE_FUNCTION_NAMES);
15445        // Slice 30: mode is an ordered-set-only aggregate; remove from the
15446        // window allowlist (it was added to AGGREGATE_FUNCTION_NAMES for the
15447        // WITHIN GROUP path but never appears as a real window function in
15448        // any GSP-supported vendor — see Slice30Test.pgModeOverStillRejected
15449        // AtOuterProjection for the lock-in).
15450        s.remove("mode");
15451        // Analytic-only window functions.
15452        s.add("row_number");
15453        s.add("rank");
15454        s.add("dense_rank");
15455        s.add("lag");
15456        s.add("lead");
15457        s.add("ntile");
15458        s.add("first_value");
15459        s.add("last_value");
15460        s.add("percent_rank");
15461        s.add("cume_dist");
15462        s.add("nth_value");
15463        WINDOW_FUNCTION_NAMES = Collections.unmodifiableSet(s);
15464    }
15465
15466    /**
15467     * Slice 13: build an {@link OutputColumn} for a top-level
15468     * window-function projection. Caller must have already verified
15469     * {@link #isTopLevelWindowProjection(TExpression)}, run the
15470     * {@link #rejectEmbeddedSubqueryInProjection} and
15471     * {@link #rejectEmbeddedWindowFunction} guards, and confirmed the
15472     * surrounding body permits window projections (i.e., the
15473     * {@code !allowWindowProjection} fast-path in
15474     * {@link #buildOutputColumns} did not fire).
15475     *
15476     * <p>The constructed {@link OutputColumn} carries:
15477     * <ul>
15478     *   <li>{@code derived = true} (window functions are computed)</li>
15479     *   <li>{@code aggregate = false} (window functions are
15480     *       row-preserving — see slice-13 §14)</li>
15481     *   <li>{@code sources} = column refs from the function args only
15482     *       (PARTITION BY / OVER ORDER BY refs are excluded so that
15483     *       canonical SELECT lineage matches dlineage's
15484     *       function-arg-only SELECT BFS)</li>
15485     *   <li>{@code windowSpec = WindowSpec(partitionRefs, orderRefs, frame)}
15486     *       (slice 22 — frame may be null when the SQL has no
15487     *       {@code ROWS}/{@code RANGE}/{@code GROUPS BETWEEN ...} clause)</li>
15488     * </ul>
15489     */
15490    private static OutputColumn buildWindowOutputColumn(TResultColumn rc,
15491                                                        TSelectSqlStatement enclosingSelect,
15492                                                        NameBindingProvider provider) {
15493        TFunctionCall fn = rc.getExpr().getFunctionCall();
15494        TWindowDef wd = fn.getWindowDef();
15495
15496        // 1. Function-name allowlist (codex round-1 MUST 3).
15497        String fnName = fn.getFunctionName() == null ? null : fn.getFunctionName().toString();
15498        if (fnName == null || !WINDOW_FUNCTION_NAMES.contains(fnName.toLowerCase(Locale.ROOT))) {
15499            throw new SemanticIRBuildException(
15500                    Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_UNSUPPORTED,
15501                    "result column " + rc + " uses unsupported window function '"
15502                            + fnName + "'; supported names are " + WINDOW_FUNCTION_NAMES, rc));
15503        }
15504
15505        // 2. Reject vendor-specific function-level surfaces (codex round-1 MUST 4).
15506        if (fn.getFilterClause() != null) {
15507            throw new SemanticIRBuildException(
15508                    Diagnostic.error(DiagnosticCode.WINDOW_FILTER_NOT_SUPPORTED,
15509                    "result column " + rc + " uses FILTER (WHERE ...) on a "
15510                            + "window function; not supported yet", rc));
15511        }
15512        if (fn.getWithinGroup() != null) {
15513            throw new SemanticIRBuildException(
15514                    Diagnostic.error(DiagnosticCode.WINDOW_WITHIN_GROUP_NOT_SUPPORTED,
15515                    "result column " + rc + " uses WITHIN GROUP on a "
15516                            + "window function; not supported yet", rc));
15517        }
15518        if (fn.getOrderByList() != null && fn.getOrderByList().size() > 0) {
15519            throw new SemanticIRBuildException(
15520                    Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_LEVEL_ORDER_BY_NOT_SUPPORTED,
15521                    "result column " + rc + " uses function-level ORDER BY "
15522                            + "(LISTAGG-style); not supported yet", rc));
15523        }
15524        if (fn.getSortClause() != null) {
15525            throw new SemanticIRBuildException(
15526                    Diagnostic.error(DiagnosticCode.WINDOW_FUNCTION_LEVEL_SORT_NOT_SUPPORTED,
15527                    "result column " + rc + " uses function-level SORT clause; "
15528                            + "not supported yet", rc));
15529        }
15530
15531        // 3. Reject vendor-specific window-def surfaces (codex round-1 MUSTs 5, 7).
15532        if (wd.getName() != null) {
15533            throw new SemanticIRBuildException(
15534                    Diagnostic.error(DiagnosticCode.WINDOW_NAMED_WINDOW_DECLARATION_NOT_SUPPORTED,
15535                    "result column " + rc + " declares a named window "
15536                            + "(WINDOW name AS); not supported yet", rc));
15537        }
15538        if (wd.getReferenceName() != null) {
15539            throw new SemanticIRBuildException(
15540                    Diagnostic.error(DiagnosticCode.WINDOW_NAMED_WINDOW_REFERENCE_NOT_SUPPORTED,
15541                    "result column " + rc + " references a named window via "
15542                            + "OVER name; not supported yet", rc));
15543        }
15544        if (wd.getWithinGroup() != null) {
15545            throw new SemanticIRBuildException(
15546                    Diagnostic.error(DiagnosticCode.WINDOW_WITHIN_GROUP_INSIDE_PROJECTION_NOT_SUPPORTED,
15547                    "result column " + rc + " uses WITHIN GROUP inside the "
15548                            + "OVER clause; not supported yet", rc));
15549        }
15550        if (wd.getKeepDenseRankClause() != null) {
15551            throw new SemanticIRBuildException(
15552                    Diagnostic.error(DiagnosticCode.WINDOW_KEEP_DENSE_RANK_NOT_SUPPORTED,
15553                    "result column " + rc + " uses KEEP DENSE_RANK FIRST/LAST; "
15554                            + "not supported yet", rc));
15555        }
15556        if (wd.getDistributeBy() != null) {
15557            throw new SemanticIRBuildException(
15558                    Diagnostic.error(DiagnosticCode.WINDOW_DISTRIBUTE_BY_NOT_SUPPORTED,
15559                    "result column " + rc + " uses Hive DISTRIBUTE BY in window; "
15560                            + "not supported yet", rc));
15561        }
15562        if (wd.getClusterBy() != null) {
15563            throw new SemanticIRBuildException(
15564                    Diagnostic.error(DiagnosticCode.WINDOW_CLUSTER_BY_NOT_SUPPORTED,
15565                    "result column " + rc + " uses Hive CLUSTER BY in window; "
15566                            + "not supported yet", rc));
15567        }
15568        if (wd.getSortBy() != null) {
15569            throw new SemanticIRBuildException(
15570                    Diagnostic.error(DiagnosticCode.WINDOW_SORT_BY_NOT_SUPPORTED,
15571                    "result column " + rc + " uses Hive SORT BY in window; "
15572                            + "not supported yet", rc));
15573        }
15574        // Slice 22: frame clauses are now built into WindowSpec.frame; the
15575        // slice-13 wholesale rejection is gone. Frame build happens AFTER
15576        // empty-OVER reject below so a frame-only OVER (...) fails on
15577        // empty-OVER first (the more user-tuned error message).
15578
15579        // 4. Reject empty OVER () (slice-13 boundary; dlineage parity —
15580        // empty OVER () is byte-identical to a plain aggregate in the XML).
15581        TPartitionClause pc = wd.getPartitionClause();
15582        TOrderBy ob = wd.getOrderBy();
15583        boolean hasPartitionBy = pc != null
15584                && pc.getExpressionList() != null
15585                && pc.getExpressionList().size() > 0;
15586        boolean hasOverOrderBy = ob != null
15587                && ob.getItems() != null
15588                && ob.getItems().size() > 0;
15589        if (!hasPartitionBy && !hasOverOrderBy) {
15590            throw new SemanticIRBuildException(
15591                    Diagnostic.error(DiagnosticCode.WINDOW_EMPTY_OVER_NOT_SUPPORTED,
15592                    "result column " + rc + " uses empty OVER (); not supported yet "
15593                            + "(dlineage XML cannot discriminate from a plain aggregate)", rc));
15594        }
15595
15596        // 5. Reject Hive PARTITION BY ... SORT (...).
15597        if (pc != null && pc.getSortedColumns() != null && pc.getSortedColumns().size() > 0) {
15598            throw new SemanticIRBuildException(
15599                    Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_SORT_NOT_SUPPORTED,
15600                    "result column " + rc + " uses Hive PARTITION BY ... SORT (...); "
15601                            + "not supported yet", rc));
15602        }
15603
15604        // 6. Build PARTITION BY refs.
15605        List<ColumnRef> partitionRefs = hasPartitionBy
15606                ? buildWindowPartitionRefs(pc, rc, enclosingSelect, provider)
15607                : new ArrayList<ColumnRef>();
15608
15609        // 7. Build OVER ORDER BY refs.
15610        List<ColumnRef> orderRefs = hasOverOrderBy
15611                ? buildWindowOrderRefs(ob, rc, enclosingSelect, provider)
15612                : new ArrayList<ColumnRef>();
15613
15614        // 8. Build frame (slice 22). Null when the SQL has no ROWS/RANGE/
15615        // GROUPS clause inside OVER (...).
15616        WindowFrame frame = wd.getWindowFrame() == null
15617                ? null
15618                : buildWindowFrame(wd.getWindowFrame(), rc);
15619
15620        // 9. Build sources from args only — PARTITION BY / OVER ORDER BY
15621        // refs must NOT leak into OutputColumn.sources because canonical
15622        // SELECT lineage on the dlineage side only walks fdd edges
15623        // (function args), not fdr edges (PARTITION BY / OVER ORDER BY).
15624        List<ColumnRef> sources = (fn.getArgs() == null || fn.getArgs().size() == 0)
15625                ? new ArrayList<ColumnRef>()
15626                : collectColumnRefs(fn.getArgs(), provider);
15627
15628        // 10. Construct OutputColumn. aggregate=false ALWAYS for window
15629        // functions (row-preserving). The OutputColumn ctor enforces
15630        // the windowSpec!=null AND aggregate=false invariant.
15631        String name = effectiveOutputName(rc);
15632        return new OutputColumn(name, /*derived=*/ true, /*aggregate=*/ false,
15633                sources, new WindowSpec(partitionRefs, orderRefs, frame));
15634    }
15635
15636    /**
15637     * Slice 22: build a {@link WindowFrame} from a parser
15638     * {@link TWindowFrame}. Frame information is presentation-only
15639     * (dlineage XML harvests no frame data — see
15640     * {@code DataFlowAnalyzer.java:20558-20575}); this helper captures
15641     * the surface shape into the IR for governance consumers without
15642     * touching the canonical lineage model.
15643     *
15644     * <p>Direct field access via {@link TWindowFrame#getStartBoundary()} /
15645     * {@link TWindowFrame#getEndBoundary()}; visitors are NOT used because
15646     * {@code TWindowFrame.acceptChildren()} doesn't recurse into the
15647     * boundaries (codex round-1 SHOULD 3).
15648     *
15649     * <p>Order of guards (codex round-2 SHOULD 2): EXCLUDE first so the
15650     * error message is tuned to the actual surface; then null-guard the
15651     * boundary type (defensive — current parsers always pass it); then
15652     * map the {@code EBoundaryType} via an exhaustive switch
15653     * (slice-14 process lesson #17 — no catch-all); then check the
15654     * {@code boundaryNumber} expression type and reject non-constant
15655     * offsets (codex round-1 SHOULD 1 — PG {@code simple_object_name_t}
15656     * and ANSI {@code parenthesis_t} are reachable).
15657     *
15658     * <p>Null guards on the frame's {@link ELimitRowType} and
15659     * {@code startBoundary} fields are defensive / forward-compat: every
15660     * vendor grammar surveyed (codex round-4 NOTE 1) passes these
15661     * arguments together when constructing a {@code TWindowFrame}, so the
15662     * guards are unexercised by current parsers but protect against
15663     * future parser drift.
15664     */
15665    private static WindowFrame buildWindowFrame(TWindowFrame wf, TResultColumn rc) {
15666        // Defensive null guards (codex round-2 MUST 2; codex round-4
15667        // SHOULD 1 — labelled DEFENSIVE / FORWARD-COMPAT).
15668        if (wf.getLimitRowType() == null) {
15669            throw new SemanticIRBuildException(
15670                    Diagnostic.error(DiagnosticCode.WINDOW_FRAME_NULL_LIMIT_ROW_TYPE,
15671                    "result column " + rc + " has a frame with null limitRowType "
15672                            + "(forward-compat / unexpected parser shape); not supported", rc));
15673        }
15674        if (wf.getStartBoundary() == null) {
15675            throw new SemanticIRBuildException(
15676                    Diagnostic.error(DiagnosticCode.WINDOW_FRAME_NULL_START_BOUNDARY,
15677                    "result column " + rc + " has a frame with null start boundary "
15678                            + "(forward-compat / unexpected parser shape); not supported", rc));
15679        }
15680        WindowFrame.Unit unit = mapFrameUnit(wf.getLimitRowType());
15681        FrameBound start = buildFrameBound(wf.getStartBoundary(), rc, /*end=*/ false);
15682        FrameBound end = wf.getEndBoundary() == null
15683                ? null
15684                : buildFrameBound(wf.getEndBoundary(), rc, /*end=*/ true);
15685        return new WindowFrame(unit, start, end);
15686    }
15687
15688    /**
15689     * Slice 22: map the parser's {@link ELimitRowType} to the IR's
15690     * {@link WindowFrame.Unit}. Exhaustive switch (slice-14 process
15691     * lesson #17 — no catch-all); a future enum addition fails closed.
15692     */
15693    private static WindowFrame.Unit mapFrameUnit(ELimitRowType type) {
15694        switch (type) {
15695            case Rows:
15696                return WindowFrame.Unit.ROWS;
15697            case Range:
15698                return WindowFrame.Unit.RANGE;
15699            case Groups:
15700                return WindowFrame.Unit.GROUPS;
15701            default:
15702                throw new SemanticIRBuildException(
15703                        Diagnostic.error(DiagnosticCode.WINDOW_FRAME_UNSUPPORTED_LIMIT_ROW_TYPE,
15704                        "unsupported window frame limitRowType: " + type, null));
15705        }
15706    }
15707
15708    /**
15709     * Slice 22: build a {@link FrameBound} from a parser
15710     * {@link TWindowFrameBoundary}. The {@code end} parameter is for
15711     * error messages only (start vs end disambiguation).
15712     *
15713     * <p>Per-bound check order: EXCLUDE → boundaryType-null → kind switch
15714     * → boundaryNumber shape (codex round-2 SHOULD 2 + slice-22 invariant).
15715     */
15716    private static FrameBound buildFrameBound(TWindowFrameBoundary boundary,
15717                                              TResultColumn rc,
15718                                              boolean end) {
15719        String which = end ? "end" : "start";
15720
15721        // (a) EXCLUDE first (codex round-1 MUST 2 + Netezza probe).
15722        // Netezza populates getExclusionClause() on the END boundary for
15723        // EXCLUDE CURRENT ROW / GROUP / TIES / NO OTHERS; rejecting here
15724        // surfaces the unsupported clause with a tuned message rather
15725        // than letting the offset-shape check fire on an unrelated
15726        // surface.
15727        if (boundary.getExclusionClause() != null) {
15728            throw new SemanticIRBuildException(
15729                    Diagnostic.error(DiagnosticCode.WINDOW_FRAME_EXCLUDE_NOT_SUPPORTED,
15730                    "result column " + rc + " has a frame " + which
15731                            + " boundary with EXCLUDE clause "
15732                            + "(EXCLUDE CURRENT ROW / GROUP / TIES / NO OTHERS); "
15733                            + "not supported yet", rc));
15734        }
15735
15736        // (b) Null-guard the boundary type (defensive).
15737        if (boundary.getBoundaryType() == null) {
15738            throw new SemanticIRBuildException(
15739                    Diagnostic.error(DiagnosticCode.WINDOW_FRAME_NULL_BOUNDARY_TYPE,
15740                    "result column " + rc + " has a frame " + which
15741                            + " boundary with null boundaryType "
15742                            + "(forward-compat / unexpected parser shape); not supported", rc));
15743        }
15744
15745        // (c) Map the kind via exhaustive switch.
15746        FrameBound.Kind kind = mapBoundaryKind(boundary.getBoundaryType());
15747
15748        // (d) Capture the optional offset literal. Reject non-constant
15749        // offsets (codex round-1 SHOULD 1 + slice-22 PG/ANSI probe — PG
15750        // accepts simple_object_name_t (column ROWS BETWEEN x PRECEDING ...),
15751        // ANSI accepts parenthesis_t ((x+1))).
15752        String offsetLiteral = null;
15753        TExpression offsetExpr = boundary.getBoundaryNumber();
15754        if (offsetExpr != null) {
15755            // Slice-22 codex impl-review SHOULD 1: when the kind forbids
15756            // an offset (UNBOUNDED_*/CURRENT_ROW), reject with
15757            // SemanticIRBuildException so the failure stays inside the
15758            // builder's error contract — without this guard, a parser
15759            // surfacing a stray boundary number on CURRENT_ROW would
15760            // escape as IllegalArgumentException from
15761            // FrameBound's ctor.
15762            boolean offsetAllowed = (kind == FrameBound.Kind.PRECEDING
15763                    || kind == FrameBound.Kind.FOLLOWING);
15764            if (!offsetAllowed) {
15765                throw new SemanticIRBuildException(
15766                        Diagnostic.error(DiagnosticCode.WINDOW_FRAME_UNEXPECTED_OFFSET,
15767                        "result column " + rc + " has a frame " + which
15768                                + " boundary of kind " + kind
15769                                + " carrying an unexpected offset '"
15770                                + offsetExpr + "' (forward-compat / "
15771                                + "unexpected parser shape); not supported", rc));
15772            }
15773            EExpressionType offsetType = offsetExpr.getExpressionType();
15774            if (offsetType != EExpressionType.simple_constant_t) {
15775                throw new SemanticIRBuildException(
15776                        Diagnostic.error(DiagnosticCode.WINDOW_FRAME_OFFSET_NON_CONSTANT,
15777                        "result column " + rc + " has a frame " + which
15778                                + " offset that is not a simple constant "
15779                                + "(got " + offsetType + " '" + offsetExpr + "'); "
15780                                + "not supported yet", rc));
15781            }
15782            offsetLiteral = offsetExpr.toString();
15783        }
15784        return new FrameBound(kind, offsetLiteral);
15785    }
15786
15787    /**
15788     * Slice 22: map the parser's {@link EBoundaryType} to the IR's
15789     * {@link FrameBound.Kind}. Exhaustive switch (slice-14 process
15790     * lesson #17).
15791     */
15792    private static FrameBound.Kind mapBoundaryKind(EBoundaryType type) {
15793        switch (type) {
15794            case ebtUnboundedPreceding:
15795                return FrameBound.Kind.UNBOUNDED_PRECEDING;
15796            case ebtUnboundedFollowing:
15797                return FrameBound.Kind.UNBOUNDED_FOLLOWING;
15798            case ebtCurrentRow:
15799                return FrameBound.Kind.CURRENT_ROW;
15800            case ebtPreceding:
15801                return FrameBound.Kind.PRECEDING;
15802            case ebtFollowing:
15803                return FrameBound.Kind.FOLLOWING;
15804            default:
15805                throw new SemanticIRBuildException(
15806                        Diagnostic.error(DiagnosticCode.WINDOW_FRAME_UNSUPPORTED_BOUNDARY_TYPE,
15807                        "unsupported frame boundary type: " + type, null));
15808        }
15809    }
15810
15811    /**
15812     * Slice 13: build the PARTITION BY ref list. Every item must be a
15813     * physical column reference ({@code simple_object_name_t} resolving
15814     * via the provider to {@code EXACT_MATCH}). Other shapes are
15815     * rejected with a tuned message — slice-9 / slice-13
15816     * rejection-over-silent-loss.
15817     */
15818    private static List<ColumnRef> buildWindowPartitionRefs(TPartitionClause pc,
15819                                                             TResultColumn rc,
15820                                                             TSelectSqlStatement enclosingSelect,
15821                                                             NameBindingProvider provider) {
15822        LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
15823        TExpressionList list = pc.getExpressionList();
15824        for (int i = 0; i < list.size(); i++) {
15825            TExpression item = list.getExpression(i);
15826            EExpressionType t = item.getExpressionType();
15827            if (t == EExpressionType.simple_constant_t) {
15828                throw new SemanticIRBuildException(
15829                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_LITERAL,
15830                        "result column " + rc + " has PARTITION BY literal '"
15831                                + item + "'; not supported yet", rc));
15832            }
15833            if (t == EExpressionType.subquery_t || item.getSubQuery() != null) {
15834                throw new SemanticIRBuildException(
15835                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_SUBQUERY,
15836                        "result column " + rc + " has PARTITION BY containing "
15837                                + "a subquery; not supported yet", rc));
15838            }
15839            if (t == EExpressionType.function_t) {
15840                throw new SemanticIRBuildException(
15841                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_AGGREGATE,
15842                        "result column " + rc + " has PARTITION BY containing "
15843                                + "a function call '" + item + "'; not supported yet", rc));
15844            }
15845            if (t != EExpressionType.simple_object_name_t) {
15846                throw new SemanticIRBuildException(
15847                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_UNKNOWN_REFERENCE,
15848                        "result column " + rc + " has PARTITION BY using an "
15849                                + "unsupported expression shape (" + t + "): " + item, rc));
15850            }
15851            // Defensive: reject if the parser/resolver has retyped this
15852            // item as a projection alias. Current Oracle parsers leave
15853            // PARTITION BY <alias> as dbType=column even for projection
15854            // aliases; this guard fires for vendors that may behave
15855            // differently. The slice-19 discriminator below catches the
15856            // Oracle case where dbType stays "column" but the binding
15857            // came from the schema-less inferred-from-usage fallback.
15858            TObjectName on = item.getObjectOperand();
15859            if (on != null && on.getDbObjectType() == EDbObjectType.column_alias) {
15860                throw new SemanticIRBuildException(
15861                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_PROJECTION_ALIAS,
15862                        "result column " + rc + " has PARTITION BY referencing "
15863                                + "a projection alias '" + item + "'; not supported yet", rc));
15864            }
15865            // Slice 19: alias-bound discriminator. Reject when the
15866            // resolver's binding lacks definite FROM-scope evidence and
15867            // the name matches a calculated SELECT-list alias of the
15868            // enclosing SELECT. Without schema metadata the resolver
15869            // cannot tell alias from real column; rejection-over-silent-
15870            // guess matches the slice-9/-10/-13 invariant.
15871            if (on != null && provider.isCalculatedProjectionAliasFallback(on, enclosingSelect)) {
15872                throw new SemanticIRBuildException(
15873                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_CALCULATED_ALIAS,
15874                        "result column " + rc + " has PARTITION BY referencing a "
15875                                + "SELECT-list alias on a calculated expression ('" + item
15876                                + "'); not supported yet — requires schema metadata to "
15877                                + "discriminate alias from base column", rc));
15878            }
15879            // Resolve the column ref through the provider. EXACT_MATCH is
15880            // required (slice-1 fail-fast invariant); collectColumnRefs
15881            // does the heavy lifting and rejects anything else.
15882            List<ColumnRef> built = collectColumnRefs(item, provider);
15883            if (built.isEmpty()) {
15884                throw new SemanticIRBuildException(
15885                        Diagnostic.error(DiagnosticCode.WINDOW_PARTITION_BY_ITEM_UNUSABLE,
15886                        "result column " + rc + " has PARTITION BY item '"
15887                                + item + "' with no resolvable column refs", rc));
15888            }
15889            refs.addAll(built);
15890        }
15891        return new ArrayList<>(refs);
15892    }
15893
15894    /**
15895     * Slice 13: build the OVER ORDER BY ref list. Every sort key must
15896     * be a physical column reference (mirrors slice-9 outer ORDER BY
15897     * rejection set). Ordinals, projection aliases, expressions,
15898     * subqueries, window functions, and SIBLINGS / RESET WHEN are
15899     * rejected with tuned messages.
15900     */
15901    private static List<ColumnRef> buildWindowOrderRefs(TOrderBy ob,
15902                                                         TResultColumn rc,
15903                                                         TSelectSqlStatement enclosingSelect,
15904                                                         NameBindingProvider provider) {
15905        // Slice-13 codex impl-review MUST 2: defense in depth, mirror outer
15906        // ORDER BY's slice-9 SIBLINGS / RESET WHEN guards.
15907        if (ob.isSiblings()) {
15908            throw new SemanticIRBuildException(
15909                    Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_SIBLINGS_NOT_SUPPORTED,
15910                    "result column " + rc + " has OVER ORDER BY SIBLINGS; not supported yet "
15911                            + "(Oracle hierarchical-query syntax in window OVER clause)", rc));
15912        }
15913        if (ob.getResetWhenCondition() != null) {
15914            throw new SemanticIRBuildException(
15915                    Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_RESET_WHEN_NOT_SUPPORTED,
15916                    "result column " + rc + " has OVER ORDER BY ... RESET WHEN; not supported yet "
15917                            + "(Teradata window-style restart)", rc));
15918        }
15919        LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
15920        TOrderByItemList items = ob.getItems();
15921        for (int i = 0; i < items.size(); i++) {
15922            TOrderByItem item = items.getOrderByItem(i);
15923            TExpression key = item.getSortKey();
15924            if (key == null) {
15925                throw new SemanticIRBuildException(
15926                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_NULL_SORT_KEY,
15927                        "result column " + rc + " has OVER ORDER BY item with "
15928                                + "null sort key", rc));
15929            }
15930            EExpressionType t = key.getExpressionType();
15931            if (t == EExpressionType.simple_constant_t) {
15932                // Catches both ordinal sort keys and string-literal sort keys.
15933                throw new SemanticIRBuildException(
15934                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_LITERAL,
15935                        "result column " + rc + " has OVER ORDER BY literal/ordinal '"
15936                                + key + "'; not supported yet", rc));
15937            }
15938            if (t == EExpressionType.subquery_t || key.getSubQuery() != null) {
15939                throw new SemanticIRBuildException(
15940                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_SUBQUERY,
15941                        "result column " + rc + " has OVER ORDER BY containing a "
15942                                + "subquery; not supported yet", rc));
15943            }
15944            if (t == EExpressionType.function_t) {
15945                TFunctionCall innerFn = key.getFunctionCall();
15946                if (innerFn != null && innerFn.getWindowDef() != null) {
15947                    throw new SemanticIRBuildException(
15948                            Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_WINDOW_FUNCTION,
15949                            "result column " + rc + " has OVER ORDER BY containing a "
15950                                    + "window function; not supported yet", rc));
15951                }
15952                throw new SemanticIRBuildException(
15953                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_AGGREGATE,
15954                        "result column " + rc + " has OVER ORDER BY containing a "
15955                                + "function call '" + key + "'; not supported yet", rc));
15956            }
15957            if (t != EExpressionType.simple_object_name_t) {
15958                throw new SemanticIRBuildException(
15959                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_UNKNOWN_REFERENCE,
15960                        "result column " + rc + " has OVER ORDER BY using an "
15961                                + "unsupported expression shape (" + t + "): " + key, rc));
15962            }
15963            // NOTE: Oracle's parser DOES retype OVER ORDER BY refs to
15964            // column_alias when they match a SELECT alias (mirrors
15965            // slice-9 outer ORDER BY behaviour). The defensive
15966            // column_alias guard from PARTITION BY is intentionally
15967            // omitted here — `collectColumnRefs` already skips
15968            // column_alias-typed nodes, and the empty-refs guard below
15969            // catches the resulting unresolvable item with a clear
15970            // message. Outer ORDER BY aliases use the same path.
15971            //
15972            // Slice 19: defensive symmetry with PARTITION BY. A future
15973            // vendor whose parser does NOT retype OVER ORDER BY refs to
15974            // column_alias would land here as `simple_object_name_t`
15975            // with an inferred-from-usage resolution; the discriminator
15976            // catches that case before collectColumnRefs descends. As of
15977            // slice 19, every supported vendor retypes (probe in
15978            // §14.21), so this branch is unreachable in current tests
15979            // — kept for forward-compat.
15980            TObjectName on = key.getObjectOperand();
15981            if (on != null && provider.isCalculatedProjectionAliasFallback(on, enclosingSelect)) {
15982                throw new SemanticIRBuildException(
15983                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_CALCULATED_ALIAS,
15984                        "result column " + rc + " has OVER ORDER BY referencing a "
15985                                + "SELECT-list alias on a calculated expression ('" + key
15986                                + "'); not supported yet — requires schema metadata to "
15987                                + "discriminate alias from base column", rc));
15988            }
15989            List<ColumnRef> built = collectColumnRefs(key, provider);
15990            if (built.isEmpty()) {
15991                throw new SemanticIRBuildException(
15992                        Diagnostic.error(DiagnosticCode.WINDOW_OVER_ORDER_BY_ITEM_UNUSABLE,
15993                        "result column " + rc + " has OVER ORDER BY item '"
15994                                + key + "' with no resolvable column refs", rc));
15995            }
15996            refs.addAll(built);
15997        }
15998        return new ArrayList<>(refs);
15999    }
16000
16001    /**
16002     * Slice 13: reject any window function ({@code FUNC(...) OVER (...)})
16003     * appearing in a {@link TParseTreeNode} subtree. Used by the
16004     * WHERE / GROUP BY / JOIN ON guards before the visitor would
16005     * otherwise descend into the OVER clause and leak PARTITION BY /
16006     * OVER ORDER BY refs into the wrong column-ref bucket. Mirrors
16007     * {@link #rejectHavingWindowFunction} (slice 10) and
16008     * {@link #rejectOrderByWindowFunction} (slice 9).
16009     */
16010    /**
16011     * Slice 85 — admit RETURNING (PG / Oracle) and OUTPUT (SQL Server)
16012     * projections on INSERT / UPDATE / DELETE statements. Returns the
16013     * list of {@link OutputColumn}s for the {@code returningColumns}
16014     * slot on the DML's {@link StatementGraph}, and appends one
16015     * {@link LineageEdge} per source column ref to {@code lineage}:
16016     * <pre>
16017     *   from = LineageRef.statementOutput(dmlIdx, returningColumns[i].name)
16018     *   to   = LineageRef.tableColumn(targetQName, sourceColumnName)
16019     * </pre>
16020     * (consumer ← producer direction; mirrors slice-78 INSERT's
16021     * {@code target ← source} convention but with the DML's own output
16022     * as the consumer and the target table's column as the producer.)
16023     *
16024     * <p>At most one of {@code ret} and {@code out} is non-null. When
16025     * both are null (no RETURNING / OUTPUT clause), returns an empty
16026     * list and emits no edges.
16027     *
16028     * <p>Reject ordering (codex round-3 Q2 BLOCKING fix — two-pass):
16029     * <ol>
16030     *   <li>Pass 1, statement-level: empty projection list →
16031     *       {@link DiagnosticCode#RETURNING_EMPTY_PROJECTION}.</li>
16032     *   <li>Pass 1.5, OUTPUT-only DML-kind / pseudo-table mismatch scan:
16033     *       INSERT with any {@code DELETED.col} →
16034     *       {@link DiagnosticCode#OUTPUT_DELETED_ON_INSERT_NOT_SUPPORTED};
16035     *       DELETE with any {@code INSERTED.col} →
16036     *       {@link DiagnosticCode#OUTPUT_INSERTED_ON_DELETE_NOT_SUPPORTED}.
16037     *       Fires on the first matching column regardless of position.</li>
16038     *   <li>Pass 2, per-column (in SQL declaration order):
16039     *     <ul>
16040     *       <li>{@code *} → {@link DiagnosticCode#RETURNING_STAR_NOT_SUPPORTED}</li>
16041     *       <li>any subquery →
16042     *           {@link DiagnosticCode#RETURNING_HAS_SUBQUERY_NOT_SUPPORTED}</li>
16043     *       <li>any window function over a base ref → reuses
16044     *           {@link DiagnosticCode#CLAUSE_WINDOW_FUNCTION_LEAK} via
16045     *           {@link #rejectWindowFunctionInScope}</li>
16046     *       <li>any aggregate function over a base ref →
16047     *           {@link DiagnosticCode#RETURNING_HAS_AGGREGATE_NOT_SUPPORTED}
16048     *           (aggregates are not legal in DML RETURNING / OUTPUT per
16049     *           spec — fires defensively when parser admits them)</li>
16050     *     </ul>
16051     *   </li>
16052     * </ol>
16053     *
16054     * <p>OUTPUT_INTO_NOT_SUPPORTED is rejected at the caller (before
16055     * any FROM walk / SET / WHERE processing) so multi-violation shapes
16056     * route through the cheaper structural code first.
16057     *
16058     * @param ret RETURNING clause; null when this DML uses OUTPUT or
16059     *            no projection at all
16060     * @param out OUTPUT clause; null when this DML uses RETURNING or
16061     *            no projection at all
16062     * @param dmlKind "INSERT" / "UPDATE" / "DELETE" — only relevant for
16063     *                the pseudo-table mismatch scan (UPDATE admits both
16064     *                INSERTED and DELETED; INSERT admits only INSERTED;
16065     *                DELETE admits only DELETED)
16066     * @param targetQName the target table's qualified name; used as the
16067     *                    {@code to} endpoint of every emitted LineageEdge
16068     * @param provider name-binding provider; same instance used for
16069     *                 SET RHS / WHERE / JOIN ON ref collection so
16070     *                 FROM-side relation refs (slice-82 joined UPDATE,
16071     *                 slice-84 joined DELETE) resolve correctly
16072     * @param dmlIdx the DML statement's position in
16073     *               {@link SemanticProgram#getStatements()}; used as the
16074     *               {@code statementIndex} on the {@code from} endpoint
16075     * @param lineage in/out: collected edges are appended here
16076     * @param anchor parse-tree anchor for diagnostics
16077     */
16078    private static List<OutputColumn> buildReturningColumns(
16079            TReturningClause ret,
16080            TOutputClause out,
16081            String dmlKind,
16082            String targetQName,
16083            String targetAlias,
16084            TTable targetTable,
16085            List<RelationSource> fromSideRelations,
16086            NameBindingProvider provider,
16087            int dmlIdx,
16088            List<LineageEdge> lineage,
16089            TParseTreeNode anchor) {
16090        if (ret == null && out == null) {
16091            return Collections.emptyList();
16092        }
16093        // Oracle host-variable form: `RETURNING col INTO :v` — AST shape
16094        // is columnValueList + variableList populated, resultExprList null.
16095        // Slice 88 admits it: extract column exprs from columnValueList,
16096        // discard variableList (bind sinks have no semantic IR relevance).
16097        // The still-unsupported degenerate case (resultExprList=null AND
16098        // columnValueList=null) keeps RETURNING_INTO_NOT_SUPPORTED so the
16099        // code stays declared-not-unreachable per the slice-71/72/82 precedent.
16100        boolean isOracleInto = (ret != null && ret.getResultExprList() == null
16101                && ret.getColumnValueList() != null);
16102        if (ret != null && ret.getResultExprList() == null && !isOracleInto) {
16103            throw new SemanticIRBuildException(Diagnostic.error(
16104                    DiagnosticCode.RETURNING_INTO_NOT_SUPPORTED,
16105                    "Oracle `RETURNING col INTO :host_var` with no column list "
16106                            + "is not supported; admits the standard INTO form only",
16107                    anchor));
16108        }
16109        // Extract the source column list.
16110        TResultColumnList items = null;
16111        TExpressionList intoExprs = null;
16112        if (isOracleInto) {
16113            intoExprs = ret.getColumnValueList();
16114        } else if (ret != null) {
16115            items = ret.getResultExprList();
16116        } else {
16117            items = out.getSelectItemList();
16118        }
16119        int colCount = isOracleInto
16120                ? (intoExprs == null ? 0 : intoExprs.size())
16121                : (items == null ? 0 : items.size());
16122        // Pass 1: empty projection list (defensive — the parser usually
16123        // refuses to produce an empty list, but a malformed AST should
16124        // surface a clean diagnostic).
16125        if (colCount == 0) {
16126            throw new SemanticIRBuildException(Diagnostic.error(
16127                    DiagnosticCode.RETURNING_EMPTY_PROJECTION,
16128                    dmlKind + (ret != null ? " RETURNING" : " OUTPUT")
16129                            + " clause has no projection columns",
16130                    anchor));
16131        }
16132        // Pass 1.5: OUTPUT-only DML-kind / pseudo-table mismatch scan
16133        // (codex round-1 Q4 BLOCKING — deep-walk all TObjectName leaves
16134        // so compound exprs like `OUTPUT INSERTED.a + DELETED.b` also
16135        // reject deterministically). The parser sets pseudoTableType
16136        // on the fieldAttr for SIMPLE column references but leaves
16137        // it null on the leaf TObjectNames inside compound expressions;
16138        // we detect those by checking the objectToken spelling against
16139        // "INSERTED" / "DELETED".
16140        // The Oracle INTO path skips this scan (no INSERTED/DELETED pseudo-tables).
16141        if (out != null && !isOracleInto) {
16142            final String targetAliasFinal = targetAlias;
16143            final String targetQNameFinal = targetQName;
16144            final List<RelationSource> relsFinal = fromSideRelations;
16145            for (int i = 0; i < items.size(); i++) {
16146                TResultColumn rc = items.getResultColumn(i);
16147                final String dmlKindFinal = dmlKind;
16148                final TResultColumn rcFinal = rc;
16149                scanOutputPseudoTableLeaves(rc.getExpr(),
16150                        new TParseTreeVisitor() {
16151                            @Override
16152                            public void preVisit(TObjectName n) {
16153                                EPseudoTableType pt = detectPseudoTable(
16154                                        n, rcFinal, targetAliasFinal,
16155                                        targetQNameFinal, relsFinal);
16156                                if (pt == EPseudoTableType.deleted
16157                                        && "INSERT".equals(dmlKindFinal)) {
16158                                    throw new SemanticIRBuildException(Diagnostic.error(
16159                                            DiagnosticCode.OUTPUT_DELETED_ON_INSERT_NOT_SUPPORTED,
16160                                            "INSERT OUTPUT references DELETED."
16161                                                    + bareColumnNameOf(n)
16162                                                    + " but there is no deleted-row "
16163                                                    + "image on INSERT; use INSERTED.* instead",
16164                                            rcFinal));
16165                                }
16166                                if (pt == EPseudoTableType.inserted
16167                                        && "DELETE".equals(dmlKindFinal)) {
16168                                    throw new SemanticIRBuildException(Diagnostic.error(
16169                                            DiagnosticCode.OUTPUT_INSERTED_ON_DELETE_NOT_SUPPORTED,
16170                                            "DELETE OUTPUT references INSERTED."
16171                                                    + bareColumnNameOf(n)
16172                                                    + " but there is no inserted-row "
16173                                                    + "image on DELETE; use DELETED.* instead",
16174                                            rcFinal));
16175                                }
16176                            }
16177                        });
16178            }
16179        }
16180        // Pass 2: per-column. Build OutputColumns, emit edges.
16181        List<OutputColumn> outputs = new ArrayList<>(colCount);
16182        for (int i = 0; i < colCount; i++) {
16183            // For the Oracle INTO path rc is null — the INTO column list
16184            // carries bare expressions, not TResultColumn wrappers.
16185            TResultColumn rc = isOracleInto ? null
16186                    : items.getResultColumn(i);
16187            TExpression expr = isOracleInto
16188                    ? intoExprs.getExpression(i)
16189                    : (rc == null ? null : rc.getExpr());
16190            if (expr == null) {
16191                throw new SemanticIRBuildException(Diagnostic.error(
16192                        DiagnosticCode.RESULT_COLUMN_NULL_EXPRESSION,
16193                        dmlKind + (ret != null ? " RETURNING" : " OUTPUT")
16194                                + " column #" + (i + 1) + " has no expression",
16195                        rc != null ? rc : anchor));
16196            }
16197            // Slice 98 — MSSQL MERGE OUTPUT `$action` pseudo-column.
16198            // Returns the merge action string per output row ('INSERT' /
16199            // 'UPDATE' / 'DELETE') — it has no underlying base column.
16200            // Detected case-insensitively because parser tokens come out
16201            // as `$action` regardless of how the user wrote it; bracketed
16202            // `[$action]` is a delimited identifier and is NOT treated
16203            // as the pseudo-column (codex Q1 confirmed YES — slice-98
16204            // detection is literal text equality on the un-bracketed
16205            // spelling). The check is gated on dmlKind="MERGE" so
16206            // INSERT/UPDATE/DELETE OUTPUT (slice 85) are unaffected.
16207            if ("MERGE".equals(dmlKind)
16208                    && isMergeActionPseudoColumn(expr)) {
16209                String actionName = (rc != null && rc.getColumnAlias() != null
16210                        && !rc.getColumnAlias().toString().isEmpty())
16211                        ? rc.getColumnAlias().toString()
16212                        : expr.toString();
16213                outputs.add(new OutputColumn(actionName,
16214                        /*derived=*/ true,
16215                        /*aggregate=*/ false,
16216                        Collections.<ColumnRef>emptyList()));
16217                // No LineageEdge — $action has no producer column.
16218                continue;
16219            }
16220            // STAR check — bare `RETURNING *` parses as
16221            // simple_object_name_t with toString="*"; qualified star
16222            // forms like `RETURNING t.*` / `OUTPUT inserted.*` /
16223            // `OUTPUT deleted.*` parse as simple_object_name_t with
16224            // partToken (and getColumnNameOnly()) equal to "*"
16225            // (codex round-4 BLOCKING fix).
16226            // Slice 90: standard RETURNING star attempts catalog-backed expansion.
16227            // Slice 99: MSSQL MERGE OUTPUT INSERTED.* / DELETED.*
16228            //   attempts catalog-backed expansion against the target table.
16229            // Oracle INTO star and non-MERGE OUTPUT star (and bare /
16230            // target-alias / source-alias MERGE OUTPUT star) remain rejected.
16231            if (isStarReference(expr)) {
16232                if (isOracleInto) {
16233                    // Oracle INTO star: keep existing reject.
16234                    throw new SemanticIRBuildException(Diagnostic.error(
16235                            DiagnosticCode.RETURNING_STAR_NOT_SUPPORTED,
16236                            dmlKind + " RETURNING INTO * star expansion "
16237                                    + "is not yet supported; use explicit column names",
16238                            rc != null ? rc : expr));
16239                }
16240                if (out != null) {
16241                    // Slice 99 / Slice 100 — MSSQL pseudo-table
16242                    // OUTPUT INSERTED.* / DELETED.* routes to catalog-
16243                    // backed expansion against the target table. The
16244                    // pseudo-table discriminator is the parser-set
16245                    // EPseudoTableType.inserted / .deleted flag on the
16246                    // star qualifier (slice-85 primary discriminator).
16247                    // Slice 99 lifted the reject for dmlKind="MERGE";
16248                    // slice 100 generalises to all DML kinds (INSERT /
16249                    // UPDATE / DELETE) — the parser sets pseudoTableType
16250                    // identically on non-MERGE OUTPUT stars, and Pass
16251                    // 1.5 has already rejected cross-direction
16252                    // mismatches (INSERT OUTPUT DELETED.* /
16253                    // DELETE OUTPUT INSERTED.*) before this branch.
16254                    // OUTPUT *, t.*, s.* (no pseudo-table marker) still
16255                    // reject — they're either ambiguous (bare *) or
16256                    // refer to non-pseudo relations.
16257                    EPseudoTableType pseudo = EPseudoTableType.none;
16258                    TObjectName starObj = expr.getObjectOperand();
16259                    if (starObj != null
16260                            && starObj.getPseudoTableType() != null) {
16261                        pseudo = starObj.getPseudoTableType();
16262                    }
16263                    if (pseudo == EPseudoTableType.inserted
16264                            || pseudo == EPseudoTableType.deleted) {
16265                        expandOutputPseudoTableStarColumns(
16266                                expr, rc, pseudo, dmlKind,
16267                                targetTable, targetQName,
16268                                provider, dmlIdx, lineage, anchor, outputs);
16269                        continue;
16270                    }
16271                    // Non-pseudo OUTPUT star (bare *, target-alias *,
16272                    // source-alias *): keep existing reject.
16273                    throw new SemanticIRBuildException(Diagnostic.error(
16274                            DiagnosticCode.RETURNING_STAR_NOT_SUPPORTED,
16275                            dmlKind + " OUTPUT * star expansion is not "
16276                                    + "yet supported; use explicit column names",
16277                            rc != null ? rc : expr));
16278                }
16279                // Standard RETURNING star: attempt catalog-backed expansion.
16280                // On success, the helper adds to `outputs` and `lineage` in place
16281                // and we `continue` past the normal single-column build below.
16282                expandReturningStarColumns(
16283                        expr, rc, dmlKind, targetTable, targetAlias, targetQName,
16284                        fromSideRelations, provider, dmlIdx, lineage, anchor, outputs);
16285                continue;
16286            }
16287            // Subquery / aggregate / window — guarded by !isOracleInto
16288            // because Oracle's INTO column list forbids nested queries and
16289            // aggregates at the grammar level; skip the checks to avoid
16290            // false rejects on unusual AST shapes.
16291            if (!isOracleInto) {
16292                if (containsAnySubqueryExpression(expr)) {
16293                    throw new SemanticIRBuildException(Diagnostic.error(
16294                            DiagnosticCode.RETURNING_HAS_SUBQUERY_NOT_SUPPORTED,
16295                            dmlKind + " " + (ret != null ? "RETURNING" : "OUTPUT")
16296                                    + " column #" + (i + 1) + " contains a subquery; "
16297                                    + "slice 85 admits scalar expressions over base columns only",
16298                            rc));
16299                }
16300                rejectWindowFunctionInScope(expr,
16301                        dmlKind + " " + (ret != null ? "RETURNING" : "OUTPUT"));
16302                if (isAggregateFunction(expr)) {
16303                    throw new SemanticIRBuildException(Diagnostic.error(
16304                            DiagnosticCode.RETURNING_HAS_AGGREGATE_NOT_SUPPORTED,
16305                            dmlKind + " " + (ret != null ? "RETURNING" : "OUTPUT")
16306                                    + " column #" + (i + 1) + " contains an aggregate "
16307                                    + "function; aggregates are not legal in DML "
16308                                    + "RETURNING / OUTPUT projection per SQL spec",
16309                            rc));
16310                }
16311            }
16312            // Name extraction.
16313            // INTO path: no alias possible, use expr.toString() directly.
16314            // Normal path: use the projection-side helper which strips
16315            // INSERTED./DELETED. qualifiers on OUTPUT pseudo-table refs.
16316            String outName = isOracleInto
16317                    ? expr.toString()
16318                    : returningOutputName(rc, expr, dmlKind, ret != null);
16319            if (outName == null || outName.isEmpty()) {
16320                throw new SemanticIRBuildException(Diagnostic.error(
16321                        DiagnosticCode.RESULT_COLUMN_NO_NAME,
16322                        dmlKind + " RETURNING INTO column #" + (i + 1)
16323                                + " has no resolvable name",
16324                        anchor));
16325            }
16326            // Source collection via manual walker (slice-89 fix registers
16327            // RETURNING refs in Resolver2 allColumnReferences for DELETE/UPDATE;
16328            // INSERT RETURNING lacks an InsertScope so Resolver2 path is partial).
16329            // rc=null is safe for the INTO path: synthRefForReturningLeaf
16330            // only dereferences rc inside the `if (isOutput)` guard,
16331            // which is false for all RETURNING (non-OUTPUT) paths.
16332            List<ColumnRef> sources = collectReturningSourceRefs(
16333                    expr, rc, out != null && !isOracleInto,
16334                    targetAlias, targetQName, fromSideRelations);
16335            boolean derived = expr.getExpressionType()
16336                    != EExpressionType.simple_object_name_t;
16337            outputs.add(new OutputColumn(outName, derived,
16338                    /*aggregate=*/ false, sources));
16339            // Emit one LineageEdge per source column ref.
16340            // Edge direction (consumer ← producer; slice-85 convention
16341            // documented on getReturningColumns()):
16342            //   from = STATEMENT_OUTPUT(dmlIdx, returningName)
16343            //   to   = TABLE_COLUMN(<producer-qualified-name>, <colName>)
16344            // The producer qualified-name is:
16345            //   - target table qname when the source ref's relationAlias
16346            //     is INSERTED / DELETED (MSSQL OUTPUT pseudo-tables both
16347            //     ultimately reference the physical target row)
16348            //   - target table qname when the source ref's relationAlias
16349            //     matches the target alias
16350            //   - FROM-side relation's binding qualifiedName when the
16351            //     ref's relationAlias matches a FROM-side relation
16352            //   - the relationAlias verbatim otherwise (defensive)
16353            for (ColumnRef src : sources) {
16354                String srcCol = src.getColumnName();
16355                if (srcCol == null || srcCol.isEmpty()) continue;
16356                String alias = src.getRelationAlias();
16357                String to = resolveReturningEdgeTarget(alias,
16358                        targetAlias, targetQName, fromSideRelations);
16359                lineage.add(new LineageEdge(
16360                        LineageRef.statementOutput(dmlIdx, outName),
16361                        LineageRef.tableColumn(to, srcCol)));
16362            }
16363        }
16364        return outputs;
16365    }
16366
16367    /**
16368     * Slice 98 helper — true when an expression is the MSSQL MERGE
16369     * OUTPUT {@code $action} pseudo-column.
16370     *
16371     * <p>Detection rule: the expression is a
16372     * {@code simple_object_name_t} and its {@code toString()} matches
16373     * {@code "$action"} case-insensitively. Bracketed delimited
16374     * identifiers like {@code [$action]} parse with the brackets in
16375     * the token string, so they are NOT matched here — a column
16376     * actually named {@code $action} (delimited) is treated as a
16377     * normal target column, not the pseudo-column (codex Q1 confirmed).
16378     *
16379     * <p>Caller gates the check on {@code dmlKind == "MERGE"} so the
16380     * slice-85 INSERT/UPDATE/DELETE OUTPUT path is unaffected.
16381     */
16382    private static boolean isMergeActionPseudoColumn(TExpression expr) {
16383        if (expr == null) return false;
16384        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
16385            return false;
16386        }
16387        String text = expr.toString();
16388        return text != null && "$action".equalsIgnoreCase(text);
16389    }
16390
16391    /**
16392     * Slice 85 helper — true when an expression is a star reference
16393     * (bare {@code *} or qualified {@code t.*} / {@code INSERTED.*} /
16394     * {@code DELETED.*}). Covers both forms: bare star has
16395     * {@code expr.toString()=="*"}; qualified star is a
16396     * simple_object_name_t whose leaf TObjectName has
16397     * {@code partToken=="*"} (codex round-4 BLOCKING fix —
16398     * qualified stars were previously slipping past the bare-only
16399     * check and producing bogus ColumnRefs).
16400     */
16401    private static boolean isStarReference(TExpression expr) {
16402        if (expr == null) return false;
16403        if (expr.getExpressionType() != EExpressionType.simple_object_name_t) {
16404            return false;
16405        }
16406        if ("*".equals(expr.toString())) return true;
16407        TObjectName n = expr.getObjectOperand();
16408        if (n == null) return false;
16409        if (n.getPartToken() != null && "*".equals(n.getPartToken().toString())) {
16410            return true;
16411        }
16412        String colOnly = n.getColumnNameOnly();
16413        return colOnly != null && "*".equals(colOnly);
16414    }
16415
16416    /**
16417     * Slice 90 helper — expand a standard {@code RETURNING *} /
16418     * {@code RETURNING t.*} star into per-column
16419     * {@link OutputColumn} entries using catalog metadata from
16420     * {@link #lookupRelationColumnNames(TTable, NameBindingProvider)}.
16421     *
16422     * <p>Called only for standard PG/Oracle RETURNING (not MSSQL OUTPUT,
16423     * not Oracle INTO). Adds expanded columns to {@code outputs} and
16424     * matching {@link LineageEdge}s to {@code lineage} in place.
16425     *
16426     * <p>Qualifier matching mirrors Slice 59 SELECT star semantics:
16427     * alias-only — the qualifier must equal the target's effective alias,
16428     * not the schema-qualified name. For INSERT without alias the effective
16429     * alias is the bare table name, so {@code RETURNING employees.*} matches
16430     * {@code INSERT INTO schema.employees}.
16431     *
16432     * <p>Throws {@link SemanticIRBuildException} on any failure:
16433     * <ul>
16434     *   <li>{@link DiagnosticCode#RETURNING_STAR_CATALOG_REQUIRED} — no
16435     *       catalog metadata available for the target relation;</li>
16436     *   <li>{@link DiagnosticCode#RETURNING_STAR_NOT_SUPPORTED} — the
16437     *       qualifier matches a FROM-side relation alias but FROM-side
16438     *       star expansion is deferred to a future slice;</li>
16439     *   <li>{@link DiagnosticCode#RETURNING_STAR_QUALIFIER_UNKNOWN} — the
16440     *       qualifier does not match the target alias or any FROM-side
16441     *       relation alias.</li>
16442     * </ul>
16443     */
16444    private static void expandReturningStarColumns(
16445            TExpression expr,
16446            TResultColumn rc,
16447            String dmlKind,
16448            TTable targetTable,
16449            String targetAlias,
16450            String targetQName,
16451            List<RelationSource> fromSideRelations,
16452            NameBindingProvider provider,
16453            int dmlIdx,
16454            List<LineageEdge> lineage,
16455            TParseTreeNode anchor,
16456            List<OutputColumn> outputs) {
16457        // Extract qualifier: empty for bare `*`, table/alias name for `t.*`.
16458        String qualifier = "";
16459        TObjectName n = (expr != null) ? expr.getObjectOperand() : null;
16460        if (n != null) {
16461            String q = n.getTableString();
16462            if (q != null && !q.isEmpty()) qualifier = q;
16463        }
16464        // Rendered star form for use in diagnostic messages.
16465        String starForm = qualifier.isEmpty() ? "*" : qualifier + ".*";
16466        // Determine which relation to expand.
16467        // Rule (Slice 90, mirrors Slice 59 correlation-name semantics):
16468        //   effective alias only — bare name without alias counts as alias.
16469        boolean matchesTarget = qualifier.isEmpty()
16470                || qualifier.equalsIgnoreCase(targetAlias);
16471        if (matchesTarget) {
16472            // Attempt catalog-backed expansion of the target table.
16473            List<String> cols = lookupRelationColumnNames(targetTable, provider);
16474            if (cols == null || cols.isEmpty()) {
16475                throw new SemanticIRBuildException(Diagnostic.error(
16476                        DiagnosticCode.RETURNING_STAR_CATALOG_REQUIRED,
16477                        dmlKind + " RETURNING " + starForm
16478                                + " requires catalog metadata for target '"
16479                                + targetQName + "' to expand; supply a Catalog via "
16480                                + "SqlSemanticAnalyzer.analyze(sql, vendor, catalog)",
16481                        rc != null ? rc : anchor));
16482            }
16483            for (String colName : cols) {
16484                ColumnRef ref = new ColumnRef(targetAlias, colName);
16485                outputs.add(new OutputColumn(colName, /*derived=*/ false,
16486                        /*aggregate=*/ false, Collections.singletonList(ref)));
16487                lineage.add(new LineageEdge(
16488                        LineageRef.statementOutput(dmlIdx, colName),
16489                        LineageRef.tableColumn(targetQName, colName)));
16490            }
16491            return;
16492        }
16493        // Qualifier doesn't match target. Check FROM-side relations.
16494        for (RelationSource rs : fromSideRelations) {
16495            if (qualifier.equalsIgnoreCase(rs.getAlias())) {
16496                // Known FROM-side relation, but expansion is deferred.
16497                throw new SemanticIRBuildException(Diagnostic.error(
16498                        DiagnosticCode.RETURNING_STAR_NOT_SUPPORTED,
16499                        dmlKind + " RETURNING " + starForm + " — "
16500                                + "star expansion for FROM-side/USING relations "
16501                                + "is deferred to a future slice; use explicit "
16502                                + "column names for FROM-side RETURNING refs",
16503                        rc != null ? rc : anchor));
16504            }
16505        }
16506        // Qualifier is truly unknown (doesn't match target or any FROM-side relation).
16507        throw new SemanticIRBuildException(Diagnostic.error(
16508                DiagnosticCode.RETURNING_STAR_QUALIFIER_UNKNOWN,
16509                dmlKind + " RETURNING " + starForm + " — qualifier '"
16510                        + qualifier + "' does not match the DML target alias '"
16511                        + targetAlias + "' or any FROM-side relation; "
16512                        + "use the target's effective alias for RETURNING star expansion",
16513                rc != null ? rc : anchor));
16514    }
16515
16516    /**
16517     * Slice 99 / Slice 100 helper — expand MSSQL pseudo-table
16518     * {@code OUTPUT INSERTED.*} / {@code OUTPUT DELETED.*} into
16519     * per-column {@link OutputColumn} entries using catalog metadata
16520     * from {@link #lookupRelationColumnNames(TTable, NameBindingProvider)}.
16521     *
16522     * <p>Slice 99 originally introduced this helper for MSSQL MERGE
16523     * OUTPUT. Slice 100 generalised it to all DML kinds (INSERT / UPDATE
16524     * / DELETE / MERGE): the parser sets {@code pseudoTableType=inserted/deleted}
16525     * on the star qualifier identically for non-MERGE DML, so the
16526     * expansion is mechanically the same — only the catalog-missing
16527     * message text varies by {@code dmlKind} per the slice-80
16528     * message-text-discrimination contract.
16529     *
16530     * <p>Mirrors the slice-90 standard-RETURNING star design with two
16531     * differences:
16532     * <ul>
16533     *   <li>The pseudo-table qualifier ({@code INSERTED} / {@code DELETED})
16534     *       is normalized to UPPERCASE on
16535     *       {@link ColumnRef#getRelationAlias()} regardless of the SQL
16536     *       case, matching slice-85
16537     *       {@code synthRefForReturningLeaf}'s
16538     *       {@code new ColumnRef("INSERTED", ...)} convention.</li>
16539     *   <li>The catalog lookup target is the DML target table (the
16540     *       pseudo-table rows physically reference target rows), not a
16541     *       FROM-side relation.</li>
16542     * </ul>
16543     *
16544     * <p>Catalog miss reuses {@link DiagnosticCode#RETURNING_STAR_CATALOG_REQUIRED}
16545     * (slice-90 code) with a discriminating message text formatted as
16546     * {@code "<dmlKind> OUTPUT <pseudoLabel>.* requires catalog metadata
16547     * for target '<qname>' ..."}.
16548     *
16549     * <p>Adds expanded columns to {@code outputs} and matching
16550     * {@link LineageEdge}s to {@code lineage} in place.
16551     *
16552     * @param dmlKind one of {@code "INSERT"}, {@code "UPDATE"},
16553     *                {@code "DELETE"}, {@code "MERGE"} — feeds the
16554     *                catalog-missing message text only; expansion is
16555     *                identical regardless of kind because the parser
16556     *                sets {@code pseudoTableType} on the star qualifier
16557     *                uniformly.
16558     */
16559    private static void expandOutputPseudoTableStarColumns(
16560            TExpression expr,
16561            TResultColumn rc,
16562            EPseudoTableType pseudoTable,
16563            String dmlKind,
16564            TTable targetTable,
16565            String targetQName,
16566            NameBindingProvider provider,
16567            int dmlIdx,
16568            List<LineageEdge> lineage,
16569            TParseTreeNode anchor,
16570            List<OutputColumn> outputs) {
16571        // Slice-85 convention: relationAlias is normalized to UPPERCASE.
16572        String pseudoLabel = (pseudoTable == EPseudoTableType.inserted)
16573                ? "INSERTED"
16574                : "DELETED";
16575        List<String> cols = lookupRelationColumnNames(targetTable, provider);
16576        if (cols == null || cols.isEmpty()) {
16577            throw new SemanticIRBuildException(Diagnostic.error(
16578                    DiagnosticCode.RETURNING_STAR_CATALOG_REQUIRED,
16579                    dmlKind + " OUTPUT " + pseudoLabel + ".* requires catalog "
16580                            + "metadata for target '" + targetQName
16581                            + "' to expand; supply a Catalog via "
16582                            + "SqlSemanticAnalyzer.analyze(sql, vendor, catalog)",
16583                    rc != null ? rc : anchor));
16584        }
16585        for (String colName : cols) {
16586            ColumnRef ref = new ColumnRef(pseudoLabel, colName);
16587            outputs.add(new OutputColumn(colName, /*derived=*/ false,
16588                    /*aggregate=*/ false, Collections.singletonList(ref)));
16589            // INSERTED / DELETED both physically reference the target
16590            // row image; lineage edge target is the target qname.
16591            lineage.add(new LineageEdge(
16592                    LineageRef.statementOutput(dmlIdx, colName),
16593                    LineageRef.tableColumn(targetQName, colName)));
16594        }
16595    }
16596
16597    /**
16598     * Slice 85 helper — pull the pseudo-table type for an OUTPUT
16599     * column (SQL Server). Returns {@link EPseudoTableType#none} for
16600     * RETURNING (PG / Oracle) columns and for OUTPUT columns whose
16601     * top-level expression doesn't carry an INSERTED / DELETED
16602     * qualifier. For compound expressions, the parser sets the
16603     * pseudo-table type on the fieldAttr only for SIMPLE column
16604     * references; compound shapes must be deep-walked separately
16605     * via {@link #scanOutputPseudoTableLeaves}.
16606     */
16607    private static EPseudoTableType pseudoTableOf(TResultColumn rc) {
16608        if (rc == null) return EPseudoTableType.none;
16609        TObjectName fa = rc.getFieldAttr();
16610        if (fa == null) return EPseudoTableType.none;
16611        EPseudoTableType pt = fa.getPseudoTableType();
16612        return (pt == null) ? EPseudoTableType.none : pt;
16613    }
16614
16615    /**
16616     * Slice 85 helper — walk an OUTPUT projection expression and
16617     * invoke {@code visitor} on every TObjectName leaf for the
16618     * pseudo-table mismatch scan. Skips function-name TObjectNames
16619     * via the dbObjectType filter so {@code OUTPUT FUNC(inserted.x)}
16620     * still surfaces the leaf inserted.x ref.
16621     */
16622    private static void scanOutputPseudoTableLeaves(
16623            TExpression expr, final TParseTreeVisitor visitor) {
16624        if (expr == null) return;
16625        // Collect function-name identities first (codex round-2 Q1
16626        // BLOCKING — dialect-portable structural filter).
16627        final java.util.Set<TObjectName> fnLeaves =
16628                collectFunctionNameLeaves(expr);
16629        // Fast path: leaf simple_object_name_t.
16630        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
16631            TObjectName n = expr.getObjectOperand();
16632            if (n != null && !isFunctionNameObjectName(n, fnLeaves)) {
16633                visitor.preVisit(n);
16634            }
16635            return;
16636        }
16637        // Compound expression — walk for TObjectName leaves.
16638        expr.acceptChildren(new TParseTreeVisitor() {
16639            int nestedSelectDepth = 0;
16640            @Override
16641            public void preVisit(TSelectSqlStatement s) { nestedSelectDepth++; }
16642            @Override
16643            public void postVisit(TSelectSqlStatement s) { nestedSelectDepth--; }
16644            @Override
16645            public void preVisit(TObjectName node) {
16646                if (nestedSelectDepth > 0) return;
16647                if (isFunctionNameObjectName(node, fnLeaves)) return;
16648                visitor.preVisit(node);
16649            }
16650        });
16651    }
16652
16653    /**
16654     * Slice 85 helper — detect the pseudo-table type (INSERTED /
16655     * DELETED) for a TObjectName leaf in an OUTPUT projection,
16656     * honouring both the parser-set fieldAttr.pseudoTableType (for
16657     * simple leaf refs where the parser ran its qualifier swap) AND
16658     * the objectToken spelling (for compound expressions where the
16659     * parser left pseudoTableType=none on leaf TObjectNames).
16660     *
16661     * <p>Codex round-2 Q3 BLOCKING fix — MSSQL permits "INSERTED" /
16662     * "DELETED" as real identifiers via ColId, so the text-match
16663     * fallback fires ONLY when no FROM-side relation alias /
16664     * qualifiedName / bare-component matches the qualifier. With a
16665     * real table named "INSERTED" in scope, the text-match is
16666     * suppressed and the leaf surfaces as a normal column ref.
16667     */
16668    private static EPseudoTableType detectPseudoTable(TObjectName n,
16669                                                       TResultColumn rc,
16670                                                       String targetAlias,
16671                                                       String targetQName,
16672                                                       List<RelationSource> fromSideRelations) {
16673        if (n == null) return EPseudoTableType.none;
16674        // Direct: parser-set pseudoTableType.
16675        if (n.getPseudoTableType() != null
16676                && n.getPseudoTableType() != EPseudoTableType.none) {
16677            return n.getPseudoTableType();
16678        }
16679        // Indirect: if this leaf is the result column's fieldAttr,
16680        // read from the fieldAttr's pseudoTableType (slice-78
16681        // TOutputClause.doParse sets it there for simple refs).
16682        if (rc != null && rc.getFieldAttr() == n) {
16683            EPseudoTableType pt = pseudoTableOf(rc);
16684            if (pt != null && pt != EPseudoTableType.none) return pt;
16685        }
16686        // Compound expression leaf — the parser leaves
16687        // pseudoTableType=none on the leaf TObjectNames but the
16688        // objectToken spelling is preserved. Text-match
16689        // INSERTED / DELETED case-insensitively, but only when no
16690        // real FROM-side relation shadows the pseudo name (codex
16691        // round-2 Q3 BLOCKING).
16692        if (n.getObjectToken() != null && n.getPartToken() != null) {
16693            String obj = n.getObjectToken().toString();
16694            if (obj == null) return EPseudoTableType.none;
16695            boolean shadowedByRealRelation =
16696                    qualifierMatchesAnyRelation(obj, targetAlias,
16697                            targetQName, fromSideRelations);
16698            if (shadowedByRealRelation) return EPseudoTableType.none;
16699            if ("INSERTED".equalsIgnoreCase(obj)) return EPseudoTableType.inserted;
16700            if ("DELETED".equalsIgnoreCase(obj)) return EPseudoTableType.deleted;
16701        }
16702        return EPseudoTableType.none;
16703    }
16704
16705    /**
16706     * Slice 85 helper — true when {@code qualifier} matches some
16707     * real relation in scope (target alias / qualified name / bare
16708     * component, or any FROM-side relation alias / qualified name /
16709     * bare component). Used by {@link #detectPseudoTable} to
16710     * suppress the INSERTED / DELETED text-match when a real table
16711     * by that name is in scope.
16712     */
16713    private static boolean qualifierMatchesAnyRelation(String qualifier,
16714            String targetAlias, String targetQName,
16715            List<RelationSource> fromSideRelations) {
16716        if (qualifier == null || qualifier.isEmpty()) return false;
16717        if (targetAlias != null && targetAlias.equalsIgnoreCase(qualifier)) {
16718            return true;
16719        }
16720        if (targetQName != null
16721                && (targetQName.equalsIgnoreCase(qualifier)
16722                        || bareLastDotComponent(targetQName)
16723                                .equalsIgnoreCase(qualifier))) {
16724            return true;
16725        }
16726        if (fromSideRelations != null) {
16727            for (RelationSource rs : fromSideRelations) {
16728                String a = rs.getAlias();
16729                if (a != null && a.equalsIgnoreCase(qualifier)) return true;
16730                String qn = rs.getBinding() == null ? null
16731                        : rs.getBinding().getQualifiedName();
16732                if (qn != null
16733                        && (qn.equalsIgnoreCase(qualifier)
16734                                || bareLastDotComponent(qn)
16735                                        .equalsIgnoreCase(qualifier))) {
16736                    return true;
16737                }
16738            }
16739        }
16740        return false;
16741    }
16742
16743    /**
16744     * Slice 85 helper — collect identities of every TObjectName that
16745     * is a function-name in the given expression tree (codex round-2
16746     * Q1 BLOCKING — {@code EDbObjectType.function} is unreliable
16747     * across dialects: Oracle builtins surface as {@code constant},
16748     * MSSQL XML methods as {@code method}). The walker uses
16749     * {@link IdentityHashMap}-style reference equality so the
16750     * column-ref walker can structurally skip function-name leaves
16751     * regardless of dbType.
16752     */
16753    private static java.util.Set<TObjectName> collectFunctionNameLeaves(
16754            TExpression expr) {
16755        final java.util.Set<TObjectName> set = java.util.Collections.newSetFromMap(
16756                new IdentityHashMap<TObjectName, Boolean>());
16757        if (expr == null) return set;
16758        expr.acceptChildren(new TParseTreeVisitor() {
16759            @Override
16760            public void preVisit(TFunctionCall fn) {
16761                TObjectName name = fn.getFunctionName();
16762                if (name != null) set.add(name);
16763            }
16764        });
16765        return set;
16766    }
16767
16768    /**
16769     * Slice 85 helper — true when this TObjectName is in the
16770     * function-name set collected for the current expression
16771     * (codex round-2 Q1 BLOCKING fix — structural identity rather
16772     * than dbType). {@code functionNameLeaves} may be null (empty
16773     * set semantics) when the caller doesn't have the set in hand.
16774     */
16775    private static boolean isFunctionNameObjectName(TObjectName n,
16776            java.util.Set<TObjectName> functionNameLeaves) {
16777        if (n == null) return false;
16778        if (functionNameLeaves != null && functionNameLeaves.contains(n)) {
16779            return true;
16780        }
16781        // Best-effort fallback: dbType check catches the
16782        // single-leaf simple_object_name_t function case (rare —
16783        // those normally arrive as TFunctionCall). Kept defensively.
16784        EDbObjectType t = n.getDbObjectType();
16785        return t == EDbObjectType.function;
16786    }
16787
16788    /**
16789     * Slice 85 helper — best-effort spelling of the bare column name
16790     * for an OUTPUT pseudo-table ref like INSERTED.foo. Used only in
16791     * diagnostic message text.
16792     */
16793    private static String safePseudoColumn(TResultColumn rc) {
16794        if (rc == null) return "<unknown>";
16795        TObjectName fa = rc.getFieldAttr();
16796        if (fa == null) return "<unknown>";
16797        if (fa.getPartToken() != null) return fa.getPartToken().toString();
16798        if (fa.getPropertyToken() != null) return fa.getPropertyToken().toString();
16799        return rc.toString();
16800    }
16801
16802    /**
16803     * Slice 85 helper — derive the OutputColumn name for a RETURNING /
16804     * OUTPUT projection column. Uses the explicit alias when present,
16805     * else the bare column name (for OUTPUT INSERTED.col / DELETED.col
16806     * the bare partToken spelling, stripping the pseudo-table
16807     * qualifier). Falls back to {@code expr.toString()} for derived
16808     * expressions without alias (e.g. {@code RETURNING a + 1} → name
16809     * = "a + 1").
16810     */
16811    private static String returningOutputName(TResultColumn rc,
16812                                              TExpression expr,
16813                                              String dmlKind,
16814                                              boolean isReturning) {
16815        String alias = rc.getColumnAlias();
16816        if (alias != null && !alias.isEmpty()) {
16817            return alias;
16818        }
16819        // OUTPUT pseudo-table ref without alias — strip the qualifier
16820        // so the OutputColumn.name carries the bare column spelling.
16821        if (!isReturning) {
16822            EPseudoTableType pt = pseudoTableOf(rc);
16823            if (pt != EPseudoTableType.none) {
16824                TObjectName fa = rc.getFieldAttr();
16825                if (fa != null && fa.getPartToken() != null) {
16826                    return fa.getPartToken().toString();
16827                }
16828            }
16829        }
16830        // RETURNING bare column or derived expression — fall back to
16831        // the expression's toString(). For simple_object_name_t this is
16832        // the verbatim bare or qualified column spelling; for
16833        // arithmetic / function expressions it is the rendered text.
16834        String s = expr.toString();
16835        if (s == null || s.isEmpty()) {
16836            throw new SemanticIRBuildException(Diagnostic.error(
16837                    DiagnosticCode.RESULT_COLUMN_NO_NAME,
16838                    dmlKind + (isReturning ? " RETURNING" : " OUTPUT")
16839                            + " column has no resolvable name",
16840                    rc));
16841        }
16842        return s;
16843    }
16844
16845    /**
16846     * Slice 85 helper — collect ColumnRefs from a RETURNING / OUTPUT
16847     * projection expression. Slice 89 fixed TReturningClause.acceptChildren()
16848     * to descend into children so Resolver2 now registers RETURNING refs in
16849     * allColumnReferences for DELETE/UPDATE (INSERT RETURNING lacks InsertScope
16850     * so Resolver2 coverage there is partial). This walker remains the
16851     * authoritative source for Semantic IR because it maps qualifier tokens
16852     * directly onto the DML's known relation set:
16853     *
16854     * <ul>
16855     *   <li>OUTPUT pseudo-table ref ({@code INSERTED.col} /
16856     *       {@code DELETED.col}, detected via
16857     *       {@code fieldAttr.pseudoTableType}) → ColumnRef with
16858     *       uppercase {@code "INSERTED"} / {@code "DELETED"} as the
16859     *       relationAlias, preserving temporal phase (codex round-2
16860     *       Q2 BLOCKING).</li>
16861     *   <li>Qualified ref matching a FROM-side relation's alias
16862     *       (slice-82 joined UPDATE / slice-84 joined DELETE)
16863     *       → ColumnRef with that alias.</li>
16864     *   <li>Qualified ref matching the target table's effective alias
16865     *       or its qualified name → ColumnRef with the target alias.</li>
16866     *   <li>Unqualified ref → ColumnRef with the target alias
16867     *       (default scope).</li>
16868     *   <li>Qualified ref matching nothing known → ColumnRef with the
16869     *       parser's qualifier verbatim. Lineage consumers can spot
16870     *       the unresolved relation via the relationAlias they see.</li>
16871     * </ul>
16872     *
16873     * <p>Match policy: case-insensitive on alias / qualified name,
16874     * to match the slice-83 codex Q3 advisory and stay forgiving of
16875     * dialect-specific identifier casing.
16876     */
16877    private static List<ColumnRef> collectReturningSourceRefs(
16878            TExpression expr,
16879            TResultColumn rc,
16880            boolean isOutput,
16881            String targetAlias,
16882            String targetQName,
16883            List<RelationSource> fromSideRelations) {
16884        final LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
16885        // Collect function-name identities first (codex round-2 Q1
16886        // BLOCKING — dialect-portable structural filter; see
16887        // {@link #collectFunctionNameLeaves} javadoc for the
16888        // dbObjectType unreliability rationale).
16889        final java.util.Set<TObjectName> fnLeaves = collectFunctionNameLeaves(expr);
16890        // Fast path: simple_object_name_t leaf — handle directly so
16891        // OUTPUT INSERTED.col / DELETED.col resolves through fieldAttr.
16892        if (expr.getExpressionType() == EExpressionType.simple_object_name_t) {
16893            ColumnRef r = synthRefForReturningLeaf(
16894                    rc, expr.getObjectOperand(), isOutput,
16895                    targetAlias, targetQName, fromSideRelations, fnLeaves);
16896            if (r != null) refs.add(r);
16897            return new ArrayList<>(refs);
16898        }
16899        // Compound expression — walk for TObjectName leaves. We don't
16900        // descend into nested subqueries (already rejected upstream)
16901        // and don't try to resolve fieldAttr for compound expressions
16902        // (pseudo-table qualifier inside arithmetic / function args is
16903        // a rare shape; slice 85 surfaces the bare column ref against
16904        // the target alias as a best-effort).
16905        expr.acceptChildren(new TParseTreeVisitor() {
16906            int nestedSelectDepth = 0;
16907            @Override
16908            public void preVisit(TSelectSqlStatement nested) {
16909                nestedSelectDepth++;
16910            }
16911            @Override
16912            public void postVisit(TSelectSqlStatement nested) {
16913                nestedSelectDepth--;
16914            }
16915            @Override
16916            public void preVisit(TObjectName node) {
16917                if (nestedSelectDepth > 0) return;
16918                // Codex round-1 Q1 / round-2 Q1 BLOCKING — skip
16919                // function-name leaves via structural identity.
16920                if (isFunctionNameObjectName(node, fnLeaves)) return;
16921                ColumnRef r = synthRefForReturningLeaf(
16922                        rc, node, isOutput,
16923                        targetAlias, targetQName, fromSideRelations, fnLeaves);
16924                if (r != null) refs.add(r);
16925            }
16926        });
16927        return new ArrayList<>(refs);
16928    }
16929
16930    /**
16931     * Slice 85 helper — build one ColumnRef for a TObjectName leaf in
16932     * a RETURNING / OUTPUT projection. Returns null when the node is
16933     * not a column-name reference (e.g. a function name token).
16934     */
16935    private static ColumnRef synthRefForReturningLeaf(
16936            TResultColumn rc,
16937            TObjectName node,
16938            boolean isOutput,
16939            String targetAlias,
16940            String targetQName,
16941            List<RelationSource> fromSideRelations,
16942            java.util.Set<TObjectName> functionNameLeaves) {
16943        if (node == null) return null;
16944        // Codex round-1 Q1 / round-2 Q1 BLOCKING — skip function-name
16945        // TObjectNames via structural identity (the "UPPER" leaf in
16946        // `RETURNING UPPER(name)`).
16947        if (isFunctionNameObjectName(node, functionNameLeaves)) return null;
16948        String colName = bareColumnNameOf(node);
16949        if (colName == null || colName.isEmpty()) return null;
16950        // OUTPUT pseudo-table ref (INSERTED / DELETED). Detect via
16951        // fieldAttr on the result column (parser surfaces it there
16952        // for simple refs) OR objectToken spelling (compound exprs;
16953        // codex round-1 Q4 BLOCKING — same deep detection as
16954        // detectPseudoTable). Both INSERTED and DELETED ultimately
16955        // reference the target table's row; only the temporal phase
16956        // surfaces on ColumnRef.relationAlias.
16957        if (isOutput) {
16958            EPseudoTableType pt = detectPseudoTable(node, rc,
16959                    targetAlias, targetQName, fromSideRelations);
16960            if (pt == EPseudoTableType.inserted) {
16961                return new ColumnRef("INSERTED", partColumnNameOf(node, colName));
16962            }
16963            if (pt == EPseudoTableType.deleted) {
16964                return new ColumnRef("DELETED", partColumnNameOf(node, colName));
16965            }
16966        }
16967        // Qualifier resolution: bare or qualified.
16968        String qualifier = qualifierOf(node);
16969        if (qualifier == null || qualifier.isEmpty()) {
16970            return new ColumnRef(targetAlias, colName);
16971        }
16972        // Codex round-2 Q2 + round-3 BLOCKING fix — single-pass
16973        // count-all-candidates matcher. A relation "matches" the
16974        // qualifier if any of (alias, qualifiedName, bare-last-dot
16975        // component of qualifiedName) compares case-insensitive-
16976        // equal. Multiple matches (e.g. unaliased `FROM s1.t s2.t`
16977        // both have effectiveAlias "t" via TTable.getName() fallback,
16978        // both have bareComponent "t") are ambiguous and fall through
16979        // to the verbatim qualifier path so consumers see the
16980        // ambiguity rather than a silent order-dependent pick.
16981        int totalMatches = 0;
16982        // Single-match accumulators (one each — only valid when
16983        // totalMatches == 1):
16984        RelationSource matchedRelation = null;
16985        boolean matchedIsTarget = false;
16986        if (fromSideRelations != null) {
16987            for (RelationSource rs : fromSideRelations) {
16988                if (relationCandidateMatch(rs, qualifier)) {
16989                    totalMatches++;
16990                    matchedRelation = rs;
16991                }
16992            }
16993        }
16994        if (targetCandidateMatch(targetAlias, targetQName, qualifier)) {
16995            totalMatches++;
16996            matchedIsTarget = true;
16997        }
16998        if (totalMatches == 1) {
16999            if (matchedIsTarget) {
17000                return new ColumnRef(
17001                        targetAlias != null ? targetAlias : targetQName, colName);
17002            }
17003            String a = matchedRelation.getAlias();
17004            String qn = matchedRelation.getBinding() == null ? null
17005                    : matchedRelation.getBinding().getQualifiedName();
17006            return new ColumnRef((a != null && !a.isEmpty()) ? a : qn, colName);
17007        }
17008        // Zero matches or ambiguous — pass through verbatim. Lineage
17009        // consumers can spot the unresolved / ambiguous relation.
17010        return new ColumnRef(qualifier, colName);
17011    }
17012
17013    /**
17014     * Slice 85 helper — true when a FROM-side relation is a match
17015     * candidate for the qualifier under any of: alias, full
17016     * qualifiedName, or bare last-dot component of qualifiedName
17017     * (case-insensitive). Caller uses {@link #synthRefForReturningLeaf}'s
17018     * single-pass count-then-pick policy to disambiguate.
17019     */
17020    private static boolean relationCandidateMatch(RelationSource rs,
17021                                                   String qualifier) {
17022        if (rs == null || qualifier == null || qualifier.isEmpty()) return false;
17023        String a = rs.getAlias();
17024        if (a != null && a.equalsIgnoreCase(qualifier)) return true;
17025        String qn = rs.getBinding() == null ? null
17026                : rs.getBinding().getQualifiedName();
17027        if (qn == null) return false;
17028        return qn.equalsIgnoreCase(qualifier)
17029                || bareLastDotComponent(qn).equalsIgnoreCase(qualifier);
17030    }
17031
17032    /**
17033     * Slice 85 helper — true when the target table is a candidate
17034     * match for the qualifier (alias / qualifiedName / bare
17035     * component, case-insensitive).
17036     */
17037    private static boolean targetCandidateMatch(String targetAlias,
17038                                                 String targetQName,
17039                                                 String qualifier) {
17040        if (qualifier == null || qualifier.isEmpty()) return false;
17041        if (targetAlias != null && targetAlias.equalsIgnoreCase(qualifier)) {
17042            return true;
17043        }
17044        if (targetQName != null
17045                && (targetQName.equalsIgnoreCase(qualifier)
17046                        || bareLastDotComponent(targetQName)
17047                                .equalsIgnoreCase(qualifier))) {
17048            return true;
17049        }
17050        return false;
17051    }
17052
17053    /**
17054     * Slice 85 helper — strip everything up to and including the
17055     * last dot in a qualified name. Returns the input unchanged when
17056     * no dot is present.
17057     */
17058    private static String bareLastDotComponent(String qname) {
17059        if (qname == null) return "";
17060        int dot = qname.lastIndexOf('.');
17061        return (dot < 0) ? qname : qname.substring(dot + 1);
17062    }
17063
17064    /**
17065     * Slice 85 helper — map a ColumnRef.relationAlias to the qualified
17066     * table name used on the LineageEdge {@code to} endpoint. INSERTED
17067     * / DELETED both collapse to the target's qualified name; FROM-side
17068     * aliases route to their bound qualifiedName; target alias / qname
17069     * stays on the target; unknown aliases pass through verbatim.
17070     */
17071    private static String resolveReturningEdgeTarget(
17072            String alias, String targetAlias, String targetQName,
17073            List<RelationSource> fromSideRelations) {
17074        if ("INSERTED".equals(alias) || "DELETED".equals(alias)) {
17075            return targetQName;
17076        }
17077        if (targetAlias != null && targetAlias.equalsIgnoreCase(alias)) {
17078            return targetQName;
17079        }
17080        if (targetQName != null && targetQName.equalsIgnoreCase(alias)) {
17081            return targetQName;
17082        }
17083        if (fromSideRelations != null) {
17084            for (RelationSource rs : fromSideRelations) {
17085                if (rs.getAlias() != null
17086                        && rs.getAlias().equalsIgnoreCase(alias)) {
17087                    String qn = rs.getBinding() == null ? null
17088                            : rs.getBinding().getQualifiedName();
17089                    return (qn != null && !qn.isEmpty()) ? qn : alias;
17090                }
17091            }
17092        }
17093        return alias != null ? alias : targetQName;
17094    }
17095
17096    /**
17097     * Slice 85 helper — extract the bare column name from a TObjectName
17098     * leaf, honouring partToken / propertyToken / objectToken in the
17099     * order set by the parser. Returns null when no column-name token
17100     * is present.
17101     */
17102    private static String bareColumnNameOf(TObjectName node) {
17103        // partToken is the column name in `qualifier.col` form;
17104        // for bare `col`, the parser may put it on objectToken.
17105        if (node.getPartToken() != null) {
17106            return node.getPartToken().toString();
17107        }
17108        if (node.getColumnNameOnly() != null
17109                && !node.getColumnNameOnly().isEmpty()) {
17110            return node.getColumnNameOnly();
17111        }
17112        if (node.getObjectToken() != null) {
17113            return node.getObjectToken().toString();
17114        }
17115        return null;
17116    }
17117
17118    /**
17119     * Slice 85 helper — qualifier (table alias or schema-table) of a
17120     * column-name reference. Returns null for bare references.
17121     */
17122    private static String qualifierOf(TObjectName node) {
17123        // For `t.col`, parser populates objectToken=t, partToken=col.
17124        if (node.getPartToken() != null && node.getObjectToken() != null) {
17125            return node.getObjectToken().toString();
17126        }
17127        return null;
17128    }
17129
17130    /**
17131     * Slice 85 helper — pseudo-table partToken column name for OUTPUT
17132     * INSERTED.col / DELETED.col. Falls back to the bare column name
17133     * when partToken is null (e.g. raw bare reference).
17134     */
17135    private static String partColumnNameOf(TObjectName node, String fallbackColName) {
17136        if (node.getPartToken() != null) {
17137            return node.getPartToken().toString();
17138        }
17139        return fallbackColName;
17140    }
17141
17142    private static void rejectWindowFunctionInScope(gudusoft.gsqlparser.nodes.TParseTreeNode root,
17143                                                    String clauseLabel) {
17144        if (root == null) return;
17145        final boolean[] found = {false};
17146        root.acceptChildren(new TParseTreeVisitor() {
17147            @Override
17148            public void preVisit(TFunctionCall fn) {
17149                if (found[0]) return;
17150                if (fn.getWindowDef() != null) found[0] = true;
17151            }
17152        });
17153        if (found[0]) {
17154            throw new SemanticIRBuildException(
17155                    Diagnostic.error(DiagnosticCode.CLAUSE_WINDOW_FUNCTION_LEAK,
17156                    clauseLabel + " contains a window function (OVER (...)); "
17157                            + "window functions are not allowed in " + clauseLabel
17158                            + " per standard SQL", root));
17159        }
17160    }
17161
17162    private static String effectiveOutputName(TResultColumn rc) {
17163        String alias = rc.getColumnAlias();
17164        if (alias != null && !alias.isEmpty()) {
17165            return alias;
17166        }
17167        String colName = rc.getColumnNameOnly();
17168        if (colName != null && !colName.isEmpty()) {
17169            return colName;
17170        }
17171        throw new SemanticIRBuildException(
17172                Diagnostic.error(DiagnosticCode.RESULT_COLUMN_NO_NAME,
17173                "result column " + rc + " has neither alias nor column name", rc));
17174    }
17175
17176    private static List<ColumnRef> buildFilterColumnRefs(TSelectSqlStatement select,
17177                                                         NameBindingProvider provider,
17178                                                         boolean allowPredicateSubqueries,
17179                                                         List<StatementGraph> stmtsForExtraction,
17180                                                         List<LineageEdge> lineageForExtraction,
17181                                                         Map<String, Integer> cteMapForExtraction,
17182                                                         PredicateClauseContext whereClauseContext) {
17183        TWhereClause where = select.getWhereClause();
17184        if (where == null || where.getCondition() == null) {
17185            return new ArrayList<>();
17186        }
17187        Set<TExpression> extractedWhereRoots =
17188                Collections.<TExpression>emptySet();
17189        if (containsAnySubquery(where)) {
17190            if (!allowPredicateSubqueries) {
17191                // Slice 112 — non-outer SELECTs (FROM-subquery, scalar
17192                // projection subquery body, predicate body) keep the
17193                // slice-80 blanket reject. The outermost SELECT path
17194                // (slice 112) and set-op branch path (slice 113) thread
17195                // {@code allowPredicateSubqueries=true} plus the live
17196                // extraction context so the slice-23+ walker can lift
17197                // uncorrelated predicate-subquery wrappers. Inner
17198                // contexts also have earlier preflight rejecters
17199                // ({@code rejectSubqueriesInFromSubqueryBodyClauses} for
17200                // FROM-subquery bodies, {@code rejectSubqueriesInPredicateBodyClauses}
17201                // for slice-23 predicate bodies); this remains the
17202                // fallback path for any unanticipated nested SELECTs
17203                // that bypass those preflights.
17204                throw new SemanticIRBuildException(
17205                        Diagnostic.error(DiagnosticCode.WHERE_HAS_SUBQUERY_NOT_SUPPORTED,
17206                        "WHERE clause contains a subquery; subqueries in WHERE "
17207                                + "are not supported yet in nested SELECTs",
17208                        select));
17209            }
17210            // Slice 112 / 113 — outer SELECT WHERE and set-op branch
17211            // WHERE lift the slice-80 blanket subquery reject by
17212            // routing uncorrelated predicate-subquery wrappers
17213            // (IN-SELECT / EXISTS / NOT EXISTS / scalar comparison /
17214            // ANY-ALL-SOME) through the slice-23+ JOIN-ON extraction
17215            // pipeline refactored by slice 110 to take a
17216            // PredicateClauseContext. Slice 112 added the SELECT_WHERE
17217            // constant for outer SELECT WHERE; slice 113 adds the
17218            // SET_OP_BRANCH_WHERE constant for nested set-op branch
17219            // WHERE — both reuse the same SELECT_WHERE_* DiagnosticCode
17220            // family (a branch IS a SELECT, only nested) and differ
17221            // only in the {@code clauseLabel} for diagnostic messages.
17222            // Each extracted wrapper lands as its own
17223            // <predicate_subquery_<i>> StatementGraph BEFORE the host
17224            // outer SELECT or set-op branch in {@code stmts} (selectIdx
17225            // = stmts.size() naturally accounts for them — slice-83
17226            // dynamic-index pattern, slice 110/111 precedent).
17227            //
17228            // Remaining non-subquery refs flow into filterColumnRefs
17229            // via collectColumnRefsSkipping. Window functions in
17230            // non-subquery subtrees still reject via
17231            // rejectWindowFunctionInScopeSkipping. The {@code provider}
17232            // already carries withCteContext / withInScopeRelationColumns
17233            // from the outer build chain (slice-65 withUsingScope
17234            // preserves both facets), so the predicate body's inner
17235            // FROM cte routes through RelationKind.CTE and the body's
17236            // own lineage edge becomes STATEMENT_OUTPUT(predicateIdx,
17237            // col) -> STATEMENT_OUTPUT(cteIdx, col) instead of
17238            // TABLE_COLUMN (slice 110/111 precedent).
17239            extractedWhereRoots =
17240                    extractUncorrelatedPredicateSubqueriesFromClause(
17241                            where.getCondition(), provider,
17242                            stmtsForExtraction, lineageForExtraction,
17243                            cteMapForExtraction,
17244                            whereClauseContext);
17245            rejectAnyRemainingSubqueriesFromClause(
17246                    where.getCondition(), extractedWhereRoots,
17247                    whereClauseContext);
17248        }
17249        // Slice 13: reject window functions in WHERE before
17250        // collectColumnRefs descends into OVER (...) and leaks
17251        // PARTITION BY / OVER ORDER BY refs into filterColumnRefs.
17252        // Slice 112 — skip extracted predicate-subquery subtrees so
17253        // inner window functions do not leak into the outer reject
17254        // (mirrors the slice-110/111 UPDATE/DELETE WHERE behaviour).
17255        rejectWindowFunctionInScopeSkipping(where, "WHERE clause",
17256                extractedWhereRoots);
17257        return collectColumnRefsSkipping(where, provider, extractedWhereRoots);
17258    }
17259
17260    /**
17261     * Slice 65 — shared visitor body that emits either the merged-key
17262     * source list (when {@code node} is an unqualified reference to a
17263     * USING merged key in the current SELECT's
17264     * {@link UsingScope}) or the resolver2-bound {@link ColumnRef}.
17265     *
17266     * <p>Used by every visitor that walks expression subtrees and
17267     * collects column refs ({@link #collectColumnRefs},
17268     * {@link #collectColumnRefsSkippingExtended}, the derived FILTER /
17269     * WITHIN-GROUP-excluding variants). Each visitor remains
17270     * responsible for its own skip-depth and nested-SELECT-depth
17271     * tracking; only the column-emit body is shared.
17272     *
17273     * <p>Behavior:
17274     * <ul>
17275     *   <li>If the node is not a column, name is null/empty/star → no-op.</li>
17276     *   <li>If the node is unqualified AND its name matches a USING
17277     *       key in {@code provider.getUsingScope()} AND that scope
17278     *       reports the reference as ambiguous (two disconnected
17279     *       classes, or a catalog-proven out-of-class same-named
17280     *       relation) → throw {@link SemanticIRBuildException}.</li>
17281     *   <li>Otherwise if the unqualified name matches a USING key
17282     *       unambiguously → emit each {@link ColumnRef} from the
17283     *       merged source list (FROM-ordered, deduped per relation).</li>
17284     *   <li>Otherwise → delegate to
17285     *       {@link NameBindingProvider#bindColumn} and emit the bound
17286     *       {@link ColumnRef}; any non-EXACT_MATCH binding records a
17287     *       reject (caller throws after collecting all rejects).</li>
17288     * </ul>
17289     *
17290     * <p>The qualifier check is the SQL-written prefix
17291     * ({@link TObjectName#getTableString()}); when present the
17292     * merged-key path is skipped so {@code a.k} continues to resolve
17293     * to {@code (a, k)} regardless of {@code k}'s USING-key status.
17294     */
17295    private static void appendMergedOrBoundColumnRef(
17296            TObjectName node,
17297            NameBindingProvider provider,
17298            LinkedHashSet<ColumnRef> refsOut,
17299            List<String> rejectsOut) {
17300        if (node.getDbObjectType() != EDbObjectType.column) return;
17301        String name = node.getColumnNameOnly();
17302        if (name == null || "*".equals(name)) return;
17303        UsingScope scope = provider.getUsingScope();
17304        String qualifier = node.getTableString();
17305        if ((qualifier == null || qualifier.isEmpty()) && scope.has(name)) {
17306            if (scope.isAmbiguous(name)) {
17307                throw new SemanticIRBuildException(
17308                        Diagnostic.error(DiagnosticCode.UNQUALIFIED_COLUMN_AMBIGUOUS,
17309                        "unqualified reference to '" + name + "' is ambiguous: "
17310                                + scope.ambiguityReason(name)
17311                                + "; qualify with a table alias", null));
17312            }
17313            for (ColumnRef ref : scope.mergedSourcesFor(name)) {
17314                refsOut.add(ref);
17315            }
17316            return;
17317        }
17318        ColumnBinding binding = provider.bindColumn(node);
17319        if (binding == null) {
17320            rejectsOut.add(node + "[no binding]");
17321            return;
17322        }
17323        if (binding.getStatus() != ResolutionStatus.EXACT_MATCH) {
17324            rejectsOut.add(node + "[" + binding.getStatus() + "]");
17325            return;
17326        }
17327        refsOut.add(new ColumnRef(binding.getRelationAlias(), binding.getColumnName()));
17328    }
17329
17330    /**
17331     * Visit every column-typed {@link TObjectName} reachable from the given
17332     * subtree, ask the provider to bind it, and return de-duplicated
17333     * {@link ColumnRef}s. Any non-EXACT_MATCH binding aborts the build.
17334     *
17335     * <p>Slice 65: when the provider carries a non-empty
17336     * {@link UsingScope}, unqualified references that match a USING
17337     * merged key are expanded to the merged source list (one ref per
17338     * relation in the equivalence class) before delegating to
17339     * {@link NameBindingProvider#bindColumn}. See
17340     * {@link #appendMergedOrBoundColumnRef}.
17341     */
17342    private static List<ColumnRef> collectColumnRefs(gudusoft.gsqlparser.nodes.TParseTreeNode root,
17343                                                     final NameBindingProvider provider) {
17344        final LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
17345        final List<String> rejects = new ArrayList<>();
17346        root.acceptChildren(new TParseTreeVisitor() {
17347            int nestedSelectDepth = 0;
17348
17349            @Override
17350            public void preVisit(TSelectSqlStatement nested) {
17351                nestedSelectDepth++;
17352            }
17353
17354            @Override
17355            public void postVisit(TSelectSqlStatement nested) {
17356                nestedSelectDepth--;
17357            }
17358
17359            @Override
17360            public void preVisit(TObjectName node) {
17361                if (nestedSelectDepth > 0) return;
17362                appendMergedOrBoundColumnRef(node, provider, refs, rejects);
17363            }
17364        });
17365        if (!rejects.isEmpty()) {
17366            throw new SemanticIRBuildException(Diagnostic.error(DiagnosticCode.COLUMN_BINDING_NON_EXACT, "non-exact column bindings: " + rejects, null));
17367        }
17368        return new ArrayList<>(refs);
17369    }
17370
17371    /**
17372     * Tolerant variant of {@link #collectColumnRefs} for the MySQL
17373     * self-reference DELETE path (slice 92 Codex P1 fix).
17374     *
17375     * <p>The MySQL parser populates {@code stmt.tables} with 3 entries for
17376     * {@code DELETE T1 FROM T1 WHERE id = 1} (target + {@code joins[0]} +
17377     * {@code referenceJoins[0]}). Resolver2's {@code inferredCandidates}
17378     * then sees 3 candidates for any unqualified column ref and marks the
17379     * binding as NOT_FOUND, which {@link #collectColumnRefs} rejects as
17380     * {@code COLUMN_BINDING_NON_EXACT}.
17381     *
17382     * <p>This variant emits EXACT_MATCH bindings verbatim and falls back
17383     * to the SQL-written qualifier (or {@code null} for unqualified refs)
17384     * for non-exact bindings instead of throwing. Subquery children are
17385     * not descended into (matches the strict collector's behaviour).
17386     */
17387    /**
17388     * @param fallbackRelationAlias used when the binding is non-exact and the
17389     *        SQL-written qualifier is absent; for the MySQL self-reference path
17390     *        this is {@code targetQName} so unqualified refs like
17391     *        {@code WHERE id = 1} emit {@code ColumnRef(targetName, "id")}
17392     *        instead of crashing on the non-null constraint on
17393     *        {@link ColumnRef#ColumnRef(String, String)}.
17394     */
17395    private static List<ColumnRef> collectColumnRefsTolerant(
17396            gudusoft.gsqlparser.nodes.TParseTreeNode root,
17397            final NameBindingProvider provider,
17398            final String fallbackRelationAlias) {
17399        return collectColumnRefsTolerant(root, provider, fallbackRelationAlias,
17400                Collections.<TExpression>emptySet());
17401    }
17402
17403    /**
17404     * Slice 111 — variant of the slice-92 tolerant collector that also
17405     * skips any descendants of {@code skipRoots} (extracted predicate
17406     * subquery wrappers). Mirrors the
17407     * {@link #collectColumnRefsSkipping} skipping behavior so DELETE
17408     * WHERE-side IN-SELECT / EXISTS / scalar-comparison wrappers
17409     * extracted by
17410     * {@link #extractUncorrelatedPredicateSubqueriesFromClause} are
17411     * not double-collected as outer filter refs on the MySQL self-ref
17412     * DELETE path. For the non-self-ref DELETE path the
17413     * {@link #collectColumnRefsSkipping} helper handles the same job;
17414     * this helper exists only for the slice-92 path which needs the
17415     * tolerant binding behavior to survive Resolver2's
17416     * NOT_FOUND / NON_EXACT bindings on unqualified self-ref refs.
17417     */
17418    private static List<ColumnRef> collectColumnRefsTolerant(
17419            gudusoft.gsqlparser.nodes.TParseTreeNode root,
17420            final NameBindingProvider provider,
17421            final String fallbackRelationAlias,
17422            final Set<TExpression> skipRoots) {
17423        final LinkedHashSet<ColumnRef> refs = new LinkedHashSet<>();
17424        // Root fast path: if root IS a skipped TExpression subtree, return empty.
17425        if (root instanceof TExpression && skipRoots.contains(root)) {
17426            return new ArrayList<>(refs);
17427        }
17428        root.acceptChildren(new TParseTreeVisitor() {
17429            int nestedSelectDepth = 0;
17430            int skipDepth = 0;
17431
17432            @Override
17433            public void preVisit(TExpression e) {
17434                if (skipRoots.contains(e)) skipDepth++;
17435            }
17436
17437            @Override
17438            public void postVisit(TExpression e) {
17439                if (skipRoots.contains(e) && skipDepth > 0) skipDepth--;
17440            }
17441
17442            @Override
17443            public void preVisit(TSelectSqlStatement nested) {
17444                nestedSelectDepth++;
17445            }
17446
17447            @Override
17448            public void postVisit(TSelectSqlStatement nested) {
17449                nestedSelectDepth--;
17450            }
17451
17452            @Override
17453            public void preVisit(TObjectName node) {
17454                if (skipDepth > 0) return;
17455                if (nestedSelectDepth > 0) return;
17456                if (node.getDbObjectType() != EDbObjectType.column) return;
17457                String name = node.getColumnNameOnly();
17458                if (name == null || "*".equals(name)) return;
17459                ColumnBinding binding = provider.bindColumn(node);
17460                if (binding != null
17461                        && binding.getStatus() == ResolutionStatus.EXACT_MATCH) {
17462                    refs.add(new ColumnRef(
17463                            binding.getRelationAlias(), binding.getColumnName()));
17464                } else {
17465                    // Non-exact or null binding: prefer SQL-written qualifier;
17466                    // fall back to the single delete-target name so the
17467                    // ColumnRef non-null constraint is satisfied.
17468                    String qualifier = node.getTableString();
17469                    String alias = (qualifier != null && !qualifier.isEmpty())
17470                            ? qualifier : fallbackRelationAlias;
17471                    refs.add(new ColumnRef(alias, name));
17472                }
17473            }
17474        });
17475        return new ArrayList<>(refs);
17476    }
17477
17478    /**
17479     * Thrown when the input falls outside current builder scope or a
17480     * binding fails. Slice 67 attached a {@link Diagnostic} to every
17481     * throw site so external callers can pattern-match on
17482     * {@link DiagnosticCode} rather than parsing message text. The
17483     * legacy {@code (String)} constructor was removed in slice 67;
17484     * use one of the {@link Diagnostic#error} factories and the
17485     * {@link #SemanticIRBuildException(Diagnostic)} constructor.
17486     */
17487    public static final class SemanticIRBuildException extends RuntimeException {
17488        private final Diagnostic diagnostic;
17489
17490        public SemanticIRBuildException(Diagnostic diagnostic) {
17491            super(java.util.Objects.requireNonNull(diagnostic, "diagnostic").getMessage());
17492            this.diagnostic = diagnostic;
17493        }
17494
17495        /**
17496         * @return the structured diagnostic for this rejection. Always
17497         *         non-null after slice 67.
17498         */
17499        public Diagnostic getDiagnostic() {
17500            return diagnostic;
17501        }
17502    }
17503}