001package gudusoft.gsqlparser.pp2.island;
002
003import gudusoft.gsqlparser.ETokenType;
004import gudusoft.gsqlparser.TSourceToken;
005import gudusoft.gsqlparser.pp2.token.Pp2Token;
006import gudusoft.gsqlparser.pp2.token.Pp2TokenStream;
007
008import java.util.ArrayDeque;
009import java.util.ArrayList;
010import java.util.Collections;
011import java.util.Deque;
012import java.util.List;
013import java.util.Locale;
014
015/**
016 * Computes, for every token in a {@link Pp2TokenStream}, its SQL query-scope
017 * level: how many enclosing subqueries / CTE bodies contain it.
018 *
019 * <h2>Scope model</h2>
020 *
021 * <p>A token's SQL scope level is the number of enclosing <i>query parens</i>.
022 * A query paren is a {@code (} whose next solid (non-comment) token is
023 * {@code SELECT} or {@code WITH} — i.e. a parenthesised subquery or CTE body.
024 * Top-level query tokens are level 0; tokens inside one subquery are level 1;
025 * doubly-nested are level 2; and so on.
026 *
027 * <p>This makes the model robust and well-defined lexically:
028 * <ul>
029 *   <li>{@code SELECT * FROM (SELECT * FROM (SELECT 1))} → levels 0, 1, 2.</li>
030 *   <li>{@code SELECT 1 UNION SELECT 2} → all level 0 (set-operation siblings
031 *       share a scope; no extra query paren is introduced).</li>
032 *   <li>{@code WITH x AS (SELECT 1) SELECT * FROM x} → the CTE body is level 1,
033 *       the outer query level 0.</li>
034 *   <li>{@code SELECT (1 + 2) FROM t} → all level 0 (a grouping paren is not a
035 *       query paren — its next solid token is not SELECT/WITH).</li>
036 * </ul>
037 *
038 * <h2>Iterative, read-only</h2>
039 *
040 * <p>Traversal uses an explicit {@link ArrayDeque} of paren frames — never
041 * recursion — so a 2000-deep nested-subquery input cannot
042 * {@code StackOverflowError} (CLAUDE.md iterative-traversal mandate; plan
043 * §10.1, §13/R3). The detector does not mutate any {@link Pp2Token}, its roles,
044 * or the wrapped {@link TSourceToken}; it returns a {@link SqlScopeResult} side
045 * structure (consistent with S18/S19).
046 *
047 * <p>Plan reference: §7.3/S20, §7.4/S20, §10.1, §13/R3.
048 */
049public final class SqlScopeDetector {
050
051    /**
052     * Annotate the SQL scope level of every token in {@code stream}.
053     *
054     * @param stream the token stream; must not be null
055     * @return a {@link SqlScopeResult} indexed by token position; never null
056     * @throws NullPointerException if {@code stream} is null
057     */
058    public SqlScopeResult detect(Pp2TokenStream stream) {
059        if (stream == null) throw new NullPointerException("stream");
060
061        int n = stream.size();
062        int[] level = new int[n];
063
064        Deque<ParenFrame> stack = new ArrayDeque<ParenFrame>();
065        List<SqlScope> scopes = new ArrayList<SqlScope>();
066        int queryDepth = 0;
067        int maxLevel = 0;
068
069        for (int i = 0; i < n; i++) {
070            Pp2Token t = stream.get(i);
071            ETokenType type = t.getSourceToken().tokentype;
072
073            if (type == ETokenType.ttleftparenthesis) {
074                // Opener belongs to the enclosing scope; descend afterwards.
075                level[i] = queryDepth;
076                boolean isQuery = nextSolidIsQueryIntroducer(stream, i);
077                stack.push(new ParenFrame(isQuery, i, queryDepth));
078                if (isQuery) queryDepth++;
079            } else if (type == ETokenType.ttrightparenthesis) {
080                // Closer belongs to the enclosing scope: pop first (if matching).
081                if (!stack.isEmpty()) {
082                    ParenFrame f = stack.pop();
083                    if (f.isQuery) {
084                        queryDepth--;
085                        scopes.add(new SqlScope(f.openIndex, i, f.enclosingLevel));
086                    }
087                }
088                level[i] = queryDepth;
089            } else {
090                level[i] = queryDepth;
091            }
092
093            if (level[i] > maxLevel) maxLevel = level[i];
094        }
095
096        // Unclosed query parens are still recorded (closeIndex -1).
097        while (!stack.isEmpty()) {
098            ParenFrame f = stack.pop();
099            if (f.isQuery) {
100                scopes.add(new SqlScope(f.openIndex, -1, f.enclosingLevel));
101            }
102        }
103
104        return new SqlScopeResult(level, maxLevel, Collections.unmodifiableList(scopes));
105    }
106
107    /** True if the next solid (non-comment) token after index i is SELECT or WITH. */
108    private static boolean nextSolidIsQueryIntroducer(Pp2TokenStream stream, int i) {
109        for (int j = i + 1; j < stream.size(); j++) {
110            ETokenType type = stream.get(j).getSourceToken().tokentype;
111            if (isComment(type)) continue;
112            if (type != ETokenType.ttkeyword) return false;
113            String text = stream.get(j).getText();
114            if (text == null) return false;
115            String upper = text.toUpperCase(Locale.ROOT);
116            return "SELECT".equals(upper) || "WITH".equals(upper);
117        }
118        return false;
119    }
120
121    private static boolean isComment(ETokenType type) {
122        return type == ETokenType.ttsimplecomment
123            || type == ETokenType.ttbracketedcomment
124            || type == ETokenType.ttCPPComment;
125    }
126
127    /** A single open paren frame on the explicit traversal stack. */
128    private static final class ParenFrame {
129        final boolean isQuery;
130        final int openIndex;
131        final int enclosingLevel;
132        ParenFrame(boolean isQuery, int openIndex, int enclosingLevel) {
133            this.isQuery = isQuery;
134            this.openIndex = openIndex;
135            this.enclosingLevel = enclosingLevel;
136        }
137    }
138
139    /** Per-token SQL scope level plus the list of detected subquery/CTE scopes. */
140    public static final class SqlScopeResult {
141        private final int[] level;
142        private final int maxLevel;
143        private final List<SqlScope> scopes;
144
145        SqlScopeResult(int[] level, int maxLevel, List<SqlScope> scopes) {
146            this.level = level;
147            this.maxLevel = maxLevel;
148            this.scopes = scopes;
149        }
150
151        /** Number of tokens covered. */
152        public int size() { return level.length; }
153
154        /** SQL scope level of the token at {@code index} (0 = top-level query). */
155        public int levelAt(int index) { return level[index]; }
156
157        /** Maximum scope level observed (0 if no subqueries). */
158        public int getMaxLevel() { return maxLevel; }
159
160        /**
161         * All detected subquery / CTE-body scopes, each carrying the opening
162         * paren index, the closing paren index ({@code -1} if unclosed), and
163         * the enclosing level at which the subquery sits.
164         */
165        public List<SqlScope> getScopes() { return scopes; }
166    }
167
168    /** An immutable subquery/CTE scope (a query paren and its contents). */
169    public static final class SqlScope {
170        private final int openIndex;
171        private final int closeIndex;   // -1 if unclosed
172        private final int enclosingLevel;
173
174        SqlScope(int openIndex, int closeIndex, int enclosingLevel) {
175            this.openIndex = openIndex;
176            this.closeIndex = closeIndex;
177            this.enclosingLevel = enclosingLevel;
178        }
179
180        public int getOpenIndex() { return openIndex; }
181        public int getCloseIndex() { return closeIndex; }
182        public boolean isClosed() { return closeIndex >= 0; }
183        public int getEnclosingLevel() { return enclosingLevel; }
184
185        @Override
186        public String toString() {
187            return "SqlScope[open=" + openIndex + " close=" + closeIndex
188                + " enclosingLevel=" + enclosingLevel + "]";
189        }
190    }
191}