001package gudusoft.gsqlparser.pp2.overlay;
002
003import gudusoft.gsqlparser.ESetOperatorType;
004import gudusoft.gsqlparser.TCustomSqlStatement;
005import gudusoft.gsqlparser.TSourceToken;
006import gudusoft.gsqlparser.nodes.TParseTreeNode;
007import gudusoft.gsqlparser.nodes.TResultColumn;
008import gudusoft.gsqlparser.nodes.TResultColumnList;
009import gudusoft.gsqlparser.nodes.TTable;
010import gudusoft.gsqlparser.nodes.TTableList;
011import gudusoft.gsqlparser.nodes.TWhereClause;
012import gudusoft.gsqlparser.pp2.token.Pp2Token;
013import gudusoft.gsqlparser.pp2.token.Pp2TokenStream;
014import gudusoft.gsqlparser.pp2.token.TokenRole;
015import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
016
017import java.util.ArrayDeque;
018import java.util.Deque;
019import java.util.TreeMap;
020
021/**
022 * AST overlay annotator — the v3 evolution bridge (plan §5.4, §7.3/S33).
023 *
024 * <p>Reads a parsed {@link TCustomSqlStatement} and stamps AST-derived
025 * {@link TokenRole}s ({@code AST_SELECT_LIST_ITEM}, {@code AST_TABLE_REF},
026 * {@code AST_WHERE_CONDITION}) onto the matching {@link Pp2Token}s in a
027 * {@link Pp2TokenStream}. The wrapped {@link TSourceToken} is never mutated;
028 * only the {@code Pp2Token.roles} side-channel is written (forbidden #6
029 * explicitly permits designated annotator stages to write roles).
030 *
031 * <p><b>Feature flag.</b> This annotator is <b>off by default</b> in v2
032 * ({@code Pp2FormatOptions.astOverlayEnabled == false}). When the flag is on,
033 * {@code Pp2Engine} invokes it for each cleanly-parsed (AST_OK) region. v2
034 * does not yet render from these roles — that is v3's
035 * {@code AstOverlayRenderer}. Landing the annotation infrastructure now means
036 * v3 is a flag flip plus a renderer add, not a re-architecture.
037 *
038 * <h2>Offset-alignment contract</h2>
039 *
040 * <p>The {@code stream} passed to {@link #annotate} MUST be built from the
041 * <i>same parse</i> (same {@code TSourceTokenList}) as {@code stmt}'s AST, so
042 * the AST node start/end token offsets line up with the stream tokens'
043 * {@code TSourceToken.offset}. {@code Pp2Engine} guarantees this by building
044 * the region stream from the region parser's own token list; the standalone
045 * tests build the stream from the same {@code TGSqlParser} that produced the
046 * statement.
047 *
048 * <h2>Iterative traversal (CLAUDE.md mandate, plan R18)</h2>
049 *
050 * <p>UNION / INTERSECT / EXCEPT statements form a deep left-leaning
051 * {@code leftStmt}/{@code rightStmt} tree from the left-recursive grammar. A
052 * recursive descent would {@code StackOverflowError} on 2000-deep set
053 * operations. This annotator walks that tree with an explicit
054 * {@link ArrayDeque}. Within each leaf SELECT, the result-column list and
055 * table list are bounded in width (not depth), so a flat iteration over them
056 * is safe. The annotator never descends the WHERE expression tree — it marks
057 * the flat token span of the {@link TWhereClause} node only, so deep AND/OR
058 * expression trees cannot trigger recursion here either.
059 */
060public final class AstOverlayAnnotator {
061
062    /**
063     * Annotate {@code stream} with AST-derived roles from {@code stmt}, where
064     * the stream and the AST come from the same parse (offset adjustment 0).
065     *
066     * @param stmt   a parsed statement root; may be null (no-op, returns 0)
067     * @param stream the token stream built from the same parse; may be null
068     *               (no-op, returns 0)
069     * @return the number of {@code (token, role)} annotations applied (a token
070     *         touched by two roles counts twice). Useful for tests and for a
071     *         v3 caller to know whether the overlay produced anything.
072     */
073    public int annotate(TCustomSqlStatement stmt, Pp2TokenStream stream) {
074        return annotate(stmt, stream, 0L);
075    }
076
077    /**
078     * Annotate {@code stream} with AST-derived roles from {@code stmt}, applying
079     * a fixed {@code offsetAdjustment} to every AST node span before matching it
080     * against the stream tokens' absolute offsets.
081     *
082     * <p>This is the engine wiring seam: {@code Pp2Engine} parses each region
083     * from {@code originalSql.substring(range.getStartOffset(), ...)}, so the
084     * region AST's token offsets are relative to the region substring. Passing
085     * {@code offsetAdjustment = range.getStartOffset()} translates them onto the
086     * engine's whole-script stream so the roles land on the exact tokens the
087     * renderers see. (The same-parse overload passes 0.)
088     *
089     * @param stmt             parsed statement root; may be null (no-op)
090     * @param stream           target stream (absolute offsets); may be null (no-op)
091     * @param offsetAdjustment added to each AST node's start/end offset before
092     *                         matching against {@code stream} token offsets
093     * @return number of {@code (token, role)} annotations applied
094     */
095    public int annotate(TCustomSqlStatement stmt, Pp2TokenStream stream,
096                        long offsetAdjustment) {
097        if (stmt == null || stream == null || stream.isEmpty()) {
098            return 0;
099        }
100
101        // Index stream tokens by their source offset once. TreeMap gives an
102        // O(log n + k) range scan per AST node span via subMap.
103        TreeMap<Long, Pp2Token> byOffset = new TreeMap<Long, Pp2Token>();
104        for (Pp2Token t : stream) {
105            TSourceToken st = t.getSourceToken();
106            if (st != null) {
107                // First token wins on a duplicate offset (shouldn't happen for
108                // solid tokens from one parse).
109                if (!byOffset.containsKey(st.offset)) {
110                    byOffset.put(st.offset, t);
111                }
112            }
113        }
114        if (byOffset.isEmpty()) {
115            return 0;
116        }
117
118        int annotated = 0;
119
120        // Iterative walk of the (possibly very deep) set-operator tree.
121        Deque<TCustomSqlStatement> stack = new ArrayDeque<TCustomSqlStatement>();
122        stack.push(stmt);
123        while (!stack.isEmpty()) {
124            TCustomSqlStatement cur = stack.pop();
125            if (cur == null) {
126                continue;
127            }
128            if (cur instanceof TSelectSqlStatement) {
129                TSelectSqlStatement sel = (TSelectSqlStatement) cur;
130                if (sel.getSetOperatorType() != ESetOperatorType.none) {
131                    // Combined query: push children, do not annotate this node
132                    // directly — its operands are the leaf SELECTs.
133                    if (sel.getRightStmt() != null) {
134                        stack.push(sel.getRightStmt());
135                    }
136                    if (sel.getLeftStmt() != null) {
137                        stack.push(sel.getLeftStmt());
138                    }
139                    continue;
140                }
141                annotated += annotateSelectColumns(sel, byOffset, offsetAdjustment);
142            }
143            // Tables and WHERE live on the base TCustomSqlStatement, so they are
144            // annotated for any leaf statement kind (SELECT, UPDATE, DELETE...).
145            annotated += annotateTables(cur, byOffset, offsetAdjustment);
146            annotated += annotateWhere(cur, byOffset, offsetAdjustment);
147        }
148        return annotated;
149    }
150
151    private int annotateSelectColumns(TSelectSqlStatement sel,
152                                      TreeMap<Long, Pp2Token> byOffset, long adj) {
153        TResultColumnList cols = sel.getResultColumnList();
154        if (cols == null) {
155            return 0;
156        }
157        int n = 0;
158        for (TResultColumn col : cols) {
159            n += stampSpan(col, byOffset, TokenRole.AST_SELECT_LIST_ITEM, adj);
160        }
161        return n;
162    }
163
164    private int annotateTables(TCustomSqlStatement cur,
165                               TreeMap<Long, Pp2Token> byOffset, long adj) {
166        TTableList tables = cur.getTables();
167        if (tables == null) {
168            return 0;
169        }
170        int n = 0;
171        for (TTable table : tables) {
172            n += stampSpan(table, byOffset, TokenRole.AST_TABLE_REF, adj);
173        }
174        return n;
175    }
176
177    private int annotateWhere(TCustomSqlStatement cur,
178                              TreeMap<Long, Pp2Token> byOffset, long adj) {
179        TWhereClause where = cur.getWhereClause();
180        if (where == null) {
181            return 0;
182        }
183        // Span only — never descends the (possibly 2000-deep) AND/OR tree.
184        return stampSpan(where, byOffset, TokenRole.AST_WHERE_CONDITION, adj);
185    }
186
187    /**
188     * Stamp {@code role} on every stream token whose source offset falls in the
189     * inclusive span of {@code node}'s start/end tokens (shifted by {@code adj}).
190     * Returns the count of tokens stamped.
191     */
192    private int stampSpan(TParseTreeNode node, TreeMap<Long, Pp2Token> byOffset,
193                          TokenRole role, long adj) {
194        if (node == null) {
195            return 0;
196        }
197        TSourceToken start = node.getStartToken();
198        TSourceToken end = node.getEndToken();
199        if (start == null || end == null) {
200            return 0;
201        }
202        long lo = Math.min(start.offset, end.offset) + adj;
203        long hi = Math.max(start.offset, end.offset) + adj;
204        int n = 0;
205        for (Pp2Token t : byOffset.subMap(lo, true, hi, true).values()) {
206            if (!t.hasRole(role)) {
207                t.addRole(role);
208                n++;
209            }
210        }
211        return n;
212    }
213}