001package gudusoft.gsqlparser.pp2.island;
002
003import gudusoft.gsqlparser.ETokenType;
004import gudusoft.gsqlparser.pp2.Pp2FormatOptions;
005import gudusoft.gsqlparser.pp2.island.ClauseScopeAnnotator.ClauseScopeResult;
006import gudusoft.gsqlparser.pp2.island.SqlScopeDetector.SqlScope;
007import gudusoft.gsqlparser.pp2.island.SqlScopeDetector.SqlScopeResult;
008import gudusoft.gsqlparser.pp2.token.Pp2Token;
009import gudusoft.gsqlparser.pp2.token.Pp2TokenStream;
010
011import java.util.ArrayDeque;
012import java.util.ArrayList;
013import java.util.Collections;
014import java.util.Comparator;
015import java.util.Deque;
016import java.util.List;
017import java.util.Locale;
018
019/**
020 * Recognises typed {@link IslandScope}s over a {@link Pp2TokenStream}, using the
021 * SQL scope (S20), clause membership (S21), and block scope (S19) results.
022 *
023 * <h2>What it emits</h2>
024 *
025 * <ul>
026 *   <li><b>Clause islands</b> — maximal runs of identical {@link ClausePart} at
027 *       the same SQL level become SELECT / FROM / JOIN / WHERE / GROUP_BY /
028 *       HAVING / ORDER_BY / INTO / INSERT_VALUES / UPDATE_SET islands. A clause
029 *       island whose only content is keyword(s) (no operand) is flagged
030 *       {@link IncompleteReason#MISSING_OPERAND}, and an {@link IslandKind#ERROR_REGION}
031 *       island is emitted over the dangling tokens (length-capped by
032 *       {@link Pp2FormatOptions#maxErrorRegionSize}).</li>
033 *   <li><b>Subquery / CTE islands</b> — each S20 query-paren scope becomes a
034 *       {@link IslandKind#CTE} island (if its {@code (} is preceded by {@code AS})
035 *       or a {@link IslandKind#SUBQUERY} island; an unclosed one is flagged
036 *       {@link IncompleteReason#UNCLOSED_PAREN}.</li>
037 *   <li><b>CASE islands</b> — {@code CASE ... END} spans (iterative, nesting-aware,
038 *       {@code END CASE} qualifier handled); an unterminated CASE is flagged
039 *       {@link IncompleteReason#MISSING_END}.</li>
040 *   <li><b>MERGE islands</b> — a {@code MERGE} statement span (light: keyword to
041 *       statement end).</li>
042 * </ul>
043 *
044 * <p>Islands may nest/overlap; the result is sorted by start index. The
045 * recogniser is iterative (explicit {@link ArrayDeque}) and read-only — it does
046 * not mutate any {@link Pp2Token}.
047 *
048 * <p>Plan reference: §7.3/S22, §7.4/S22.
049 */
050public final class IslandRecognizer {
051
052    /**
053     * Keywords that, immediately after {@code END}, qualify an end of a
054     * construct this recogniser does not track (so {@code END IF} etc. pop
055     * nothing from the CASE/BEGIN stack).
056     */
057    private static final java.util.Set<String> END_INNER_QUALIFIERS;
058    static {
059        java.util.Set<String> q = new java.util.HashSet<String>(java.util.Arrays.asList(
060            "IF", "LOOP", "WHILE", "REPEAT", "FOR"));
061        END_INNER_QUALIFIERS = java.util.Collections.unmodifiableSet(q);
062    }
063
064    // Note on trivia: the Pp2TokenStream produced by Pp2TokenStreamBuilder (S7)
065    // folds whitespace/newlines into precedingBlanks/precedingLinebreaks, so the
066    // stream contains only solid tokens and comments. The "skip comments" helpers
067    // here therefore reach the true next/previous solid token, and runHasOperand
068    // never miscounts whitespace as an operand.
069
070    /**
071     * Recognise islands over {@code stream}.
072     *
073     * @param stream the token stream; must not be null
074     * @param sql    S20 scope result for the same stream; must not be null
075     * @param clause S21 clause result for the same stream; must not be null
076     * @param opts   pp2 options (for {@code maxErrorRegionSize}); must not be null
077     * @return an immutable, start-index-ordered list of islands; never null
078     * @throws NullPointerException if any argument is null
079     */
080    public List<IslandScope> recognize(Pp2TokenStream stream,
081                                       SqlScopeResult sql,
082                                       ClauseScopeResult clause,
083                                       Pp2FormatOptions opts) {
084        if (stream == null) throw new NullPointerException("stream");
085        if (sql == null) throw new NullPointerException("sql");
086        if (clause == null) throw new NullPointerException("clause");
087        if (opts == null) throw new NullPointerException("opts");
088
089        List<IslandScope> islands = new ArrayList<IslandScope>();
090        addClauseIslands(stream, sql, clause, opts, islands);
091        addSubqueryAndCteIslands(stream, sql, islands);
092        addCaseIslands(stream, sql, islands);
093        addMergeIslands(stream, sql, islands);
094
095        Collections.sort(islands, BY_START);
096        return Collections.unmodifiableList(islands);
097    }
098
099    private static final Comparator<IslandScope> BY_START = new Comparator<IslandScope>() {
100        @Override
101        public int compare(IslandScope a, IslandScope b) {
102            int c = Integer.compare(a.getStartIndex(), b.getStartIndex());
103            return c != 0 ? c : Integer.compare(a.getEndIndex(), b.getEndIndex());
104        }
105    };
106
107    // ---- clause islands -------------------------------------------------
108
109    private void addClauseIslands(Pp2TokenStream stream, SqlScopeResult sql,
110                                  ClauseScopeResult clause, Pp2FormatOptions opts,
111                                  List<IslandScope> out) {
112        int n = stream.size();
113        int i = 0;
114        while (i < n) {
115            ClausePart part = clause.partAt(i);
116            IslandKind kind = islandKindFor(part);
117            int level = sql.levelAt(i);
118            if (kind == null) { i++; continue; }
119
120            // Extend the run while clause part and sql level stay the same.
121            int j = i;
122            while (j + 1 < n
123                && clause.partAt(j + 1) == part
124                && sql.levelAt(j + 1) == level) {
125                j++;
126            }
127
128            boolean complete = runHasOperand(stream, i, j);
129            IncompleteReason reason = complete
130                ? IncompleteReason.NONE : IncompleteReason.MISSING_OPERAND;
131            out.add(new IslandScope(kind, i, j, level, complete, reason));
132
133            if (!complete) {
134                int cappedEnd = charCappedEnd(stream, i, j, opts.maxErrorRegionSize);
135                out.add(new IslandScope(IslandKind.ERROR_REGION, i, cappedEnd, level,
136                    false, IncompleteReason.UNRECOVERABLE));
137            }
138            i = j + 1;
139        }
140    }
141
142    /** A clause run is complete if it contains at least one non-comment, non-keyword token. */
143    private static boolean runHasOperand(Pp2TokenStream stream, int start, int end) {
144        for (int k = start; k <= end; k++) {
145            ETokenType type = stream.get(k).getSourceToken().tokentype;
146            if (isComment(type)) continue;
147            if (type != ETokenType.ttkeyword) return true;
148        }
149        return false;
150    }
151
152    private static IslandKind islandKindFor(ClausePart part) {
153        switch (part) {
154            case SELECT_LIST:   return IslandKind.SELECT;
155            case FROM:          return IslandKind.FROM;
156            case JOIN:          return IslandKind.JOIN;
157            case WHERE:         return IslandKind.WHERE;
158            case GROUP_BY:      return IslandKind.GROUP_BY;
159            case HAVING:        return IslandKind.HAVING;
160            case ORDER_BY:      return IslandKind.ORDER_BY;
161            case INTO:          return IslandKind.INTO;
162            case INSERT_VALUES: return IslandKind.INSERT_VALUES;
163            case UPDATE_SET:    return IslandKind.UPDATE_SET;
164            default:            return null; // NONE, SET_OP -> not a clause island
165        }
166    }
167
168    // ---- subquery / CTE islands -----------------------------------------
169
170    private void addSubqueryAndCteIslands(Pp2TokenStream stream, SqlScopeResult sql,
171                                          List<IslandScope> out) {
172        int lastSolid = lastNonTriviaIndex(stream);
173        for (SqlScope sc : sql.getScopes()) {
174            int start = sc.getOpenIndex();
175            int end = sc.isClosed() ? sc.getCloseIndex() : Math.max(start, lastSolid);
176            IslandKind kind = isAsPreceded(stream, start) ? IslandKind.CTE : IslandKind.SUBQUERY;
177            boolean complete = sc.isClosed();
178            IncompleteReason reason = complete
179                ? IncompleteReason.NONE : IncompleteReason.UNCLOSED_PAREN;
180            out.add(new IslandScope(kind, start, end, sc.getEnclosingLevel(), complete, reason));
181        }
182    }
183
184    /** True if the first solid token before {@code openIndex} is the keyword AS. */
185    private static boolean isAsPreceded(Pp2TokenStream stream, int openIndex) {
186        for (int j = openIndex - 1; j >= 0; j--) {
187            ETokenType type = stream.get(j).getSourceToken().tokentype;
188            if (isComment(type)) continue;
189            if (type != ETokenType.ttkeyword) return false;
190            String text = stream.get(j).getText();
191            return text != null && "AS".equalsIgnoreCase(text);
192        }
193        return false;
194    }
195
196    // ---- CASE islands ---------------------------------------------------
197
198    /** Frame kinds on the CASE/BEGIN balancing stack. */
199    private static final int FRAME_CASE = 0;
200    private static final int FRAME_BEGIN = 1;
201
202    private void addCaseIslands(Pp2TokenStream stream, SqlScopeResult sql,
203                                List<IslandScope> out) {
204        int n = stream.size();
205        // Unified stack of open CASE / BEGIN frames so an END pops the nearest
206        // open construct (a BEGIN's END must not close an enclosing CASE).
207        // Each entry is {frameKind, startIndex}.
208        Deque<int[]> stack = new ArrayDeque<int[]>();
209        int skipCaseAt = -1; // index of a CASE that is the qualifier of "END CASE"
210        int lastSolid = lastNonTriviaIndex(stream);
211
212        for (int i = 0; i < n; i++) {
213            if (stream.get(i).getSourceToken().tokentype != ETokenType.ttkeyword) continue;
214            String upper = upperKeyword(stream, i);
215
216            if ("CASE".equals(upper)) {
217                if (i == skipCaseAt) { skipCaseAt = -1; continue; }
218                stack.push(new int[]{FRAME_CASE, i});
219            } else if ("BEGIN".equals(upper)) {
220                stack.push(new int[]{FRAME_BEGIN, i});
221            } else if ("END".equals(upper)) {
222                int next = nextSolidIndex(stream, i);
223                String nextUpper = next < 0 ? null : upperKeyword(stream, next);
224                // END IF / END LOOP / END WHILE / END REPEAT / END FOR close
225                // inner constructs this recogniser does not track; they pop nothing.
226                if (nextUpper != null && END_INNER_QUALIFIERS.contains(nextUpper)) {
227                    continue;
228                }
229                if (stack.isEmpty()) continue;
230                boolean endCase = "CASE".equals(nextUpper); // the "END CASE" terminator
231                int[] frame = stack.pop();
232                if (frame[0] == FRAME_CASE) {
233                    // Include the trailing CASE token of an "END CASE" terminator.
234                    int end = endCase ? next : i;
235                    out.add(new IslandScope(IslandKind.CASE, frame[1], end,
236                        sql.levelAt(frame[1]), true, IncompleteReason.NONE));
237                }
238                // For a BEGIN frame we emit nothing (S19 owns block scopes); we
239                // only needed it to balance the END correctly.
240                if (endCase) skipCaseAt = next; // don't treat the qualifier as a new CASE
241            }
242        }
243        // Unterminated CASE blocks (any BEGIN frames left are S19's concern).
244        while (!stack.isEmpty()) {
245            int[] frame = stack.pop();
246            if (frame[0] == FRAME_CASE) {
247                int end = Math.max(frame[1], lastSolid);
248                out.add(new IslandScope(IslandKind.CASE, frame[1], end,
249                    sql.levelAt(frame[1]), false, IncompleteReason.MISSING_END));
250            }
251        }
252    }
253
254    /** Uppercased text of the keyword token at i (assumes it is a ttkeyword). */
255    private static String upperKeyword(Pp2TokenStream stream, int i) {
256        String text = stream.get(i).getText();
257        return text == null ? "" : text.toUpperCase(Locale.ROOT);
258    }
259
260    // ---- MERGE islands --------------------------------------------------
261
262    private void addMergeIslands(Pp2TokenStream stream, SqlScopeResult sql,
263                                 List<IslandScope> out) {
264        int n = stream.size();
265        for (int i = 0; i < n; i++) {
266            if (!isKeyword(stream, i, "MERGE")) continue;
267            int end = i;
268            for (int j = i + 1; j < n; j++) {
269                if (stream.get(j).getSourceToken().tokentype == ETokenType.ttsemicolon) break;
270                end = j;
271            }
272            out.add(new IslandScope(IslandKind.MERGE, i, end, sql.levelAt(i),
273                true, IncompleteReason.NONE));
274        }
275    }
276
277    // ---- helpers --------------------------------------------------------
278
279    /**
280     * The greatest index {@code e} in {@code [start, end]} such that the summed
281     * source text length of tokens {@code [start, e]} does not exceed
282     * {@code maxChars}. At least {@code start} is always included.
283     */
284    private static int charCappedEnd(Pp2TokenStream stream, int start, int end, int maxChars) {
285        if (maxChars <= 0) return start;
286        int chars = 0;
287        for (int k = start; k <= end; k++) {
288            String text = stream.get(k).getText();
289            chars += (text == null ? 0 : text.length());
290            if (chars > maxChars) return Math.max(start, k - 1);
291        }
292        return end;
293    }
294
295    private static int nextSolidIndex(Pp2TokenStream stream, int from) {
296        for (int j = from + 1; j < stream.size(); j++) {
297            if (!isComment(stream.get(j).getSourceToken().tokentype)) return j;
298        }
299        return -1;
300    }
301
302    private static int lastNonTriviaIndex(Pp2TokenStream stream) {
303        for (int j = stream.size() - 1; j >= 0; j--) {
304            if (!isComment(stream.get(j).getSourceToken().tokentype)) return j;
305        }
306        return Math.max(0, stream.size() - 1);
307    }
308
309    private static boolean isKeyword(Pp2TokenStream stream, int i, String upper) {
310        if (i < 0 || i >= stream.size()) return false;
311        Pp2Token t = stream.get(i);
312        if (t.getSourceToken().tokentype != ETokenType.ttkeyword) return false;
313        String text = t.getText();
314        return text != null && text.toUpperCase(Locale.ROOT).equals(upper);
315    }
316
317    private static boolean isComment(ETokenType type) {
318        return type == ETokenType.ttsimplecomment
319            || type == ETokenType.ttbracketedcomment
320            || type == ETokenType.ttCPPComment;
321    }
322}