001package gudusoft.gsqlparser.pp2.island; 002 003import gudusoft.gsqlparser.ETokenType; 004import gudusoft.gsqlparser.pp2.Pp2FormatOptions; 005import gudusoft.gsqlparser.pp2.island.ClauseScopeAnnotator.ClauseScopeResult; 006import gudusoft.gsqlparser.pp2.island.SqlScopeDetector.SqlScope; 007import gudusoft.gsqlparser.pp2.island.SqlScopeDetector.SqlScopeResult; 008import gudusoft.gsqlparser.pp2.token.Pp2Token; 009import gudusoft.gsqlparser.pp2.token.Pp2TokenStream; 010 011import java.util.ArrayDeque; 012import java.util.ArrayList; 013import java.util.Collections; 014import java.util.Comparator; 015import java.util.Deque; 016import java.util.List; 017import java.util.Locale; 018 019/** 020 * Recognises typed {@link IslandScope}s over a {@link Pp2TokenStream}, using the 021 * SQL scope (S20), clause membership (S21), and block scope (S19) results. 022 * 023 * <h2>What it emits</h2> 024 * 025 * <ul> 026 * <li><b>Clause islands</b> — maximal runs of identical {@link ClausePart} at 027 * the same SQL level become SELECT / FROM / JOIN / WHERE / GROUP_BY / 028 * HAVING / ORDER_BY / INTO / INSERT_VALUES / UPDATE_SET islands. A clause 029 * island whose only content is keyword(s) (no operand) is flagged 030 * {@link IncompleteReason#MISSING_OPERAND}, and an {@link IslandKind#ERROR_REGION} 031 * island is emitted over the dangling tokens (length-capped by 032 * {@link Pp2FormatOptions#maxErrorRegionSize}).</li> 033 * <li><b>Subquery / CTE islands</b> — each S20 query-paren scope becomes a 034 * {@link IslandKind#CTE} island (if its {@code (} is preceded by {@code AS}) 035 * or a {@link IslandKind#SUBQUERY} island; an unclosed one is flagged 036 * {@link IncompleteReason#UNCLOSED_PAREN}.</li> 037 * <li><b>CASE islands</b> — {@code CASE ... END} spans (iterative, nesting-aware, 038 * {@code END CASE} qualifier handled); an unterminated CASE is flagged 039 * {@link IncompleteReason#MISSING_END}.</li> 040 * <li><b>MERGE islands</b> — a {@code MERGE} statement span (light: keyword to 041 * statement end).</li> 042 * </ul> 043 * 044 * <p>Islands may nest/overlap; the result is sorted by start index. The 045 * recogniser is iterative (explicit {@link ArrayDeque}) and read-only — it does 046 * not mutate any {@link Pp2Token}. 047 * 048 * <p>Plan reference: §7.3/S22, §7.4/S22. 049 */ 050public final class IslandRecognizer { 051 052 /** 053 * Keywords that, immediately after {@code END}, qualify an end of a 054 * construct this recogniser does not track (so {@code END IF} etc. pop 055 * nothing from the CASE/BEGIN stack). 056 */ 057 private static final java.util.Set<String> END_INNER_QUALIFIERS; 058 static { 059 java.util.Set<String> q = new java.util.HashSet<String>(java.util.Arrays.asList( 060 "IF", "LOOP", "WHILE", "REPEAT", "FOR")); 061 END_INNER_QUALIFIERS = java.util.Collections.unmodifiableSet(q); 062 } 063 064 // Note on trivia: the Pp2TokenStream produced by Pp2TokenStreamBuilder (S7) 065 // folds whitespace/newlines into precedingBlanks/precedingLinebreaks, so the 066 // stream contains only solid tokens and comments. The "skip comments" helpers 067 // here therefore reach the true next/previous solid token, and runHasOperand 068 // never miscounts whitespace as an operand. 069 070 /** 071 * Recognise islands over {@code stream}. 072 * 073 * @param stream the token stream; must not be null 074 * @param sql S20 scope result for the same stream; must not be null 075 * @param clause S21 clause result for the same stream; must not be null 076 * @param opts pp2 options (for {@code maxErrorRegionSize}); must not be null 077 * @return an immutable, start-index-ordered list of islands; never null 078 * @throws NullPointerException if any argument is null 079 */ 080 public List<IslandScope> recognize(Pp2TokenStream stream, 081 SqlScopeResult sql, 082 ClauseScopeResult clause, 083 Pp2FormatOptions opts) { 084 if (stream == null) throw new NullPointerException("stream"); 085 if (sql == null) throw new NullPointerException("sql"); 086 if (clause == null) throw new NullPointerException("clause"); 087 if (opts == null) throw new NullPointerException("opts"); 088 089 List<IslandScope> islands = new ArrayList<IslandScope>(); 090 addClauseIslands(stream, sql, clause, opts, islands); 091 addSubqueryAndCteIslands(stream, sql, islands); 092 addCaseIslands(stream, sql, islands); 093 addMergeIslands(stream, sql, islands); 094 095 Collections.sort(islands, BY_START); 096 return Collections.unmodifiableList(islands); 097 } 098 099 private static final Comparator<IslandScope> BY_START = new Comparator<IslandScope>() { 100 @Override 101 public int compare(IslandScope a, IslandScope b) { 102 int c = Integer.compare(a.getStartIndex(), b.getStartIndex()); 103 return c != 0 ? c : Integer.compare(a.getEndIndex(), b.getEndIndex()); 104 } 105 }; 106 107 // ---- clause islands ------------------------------------------------- 108 109 private void addClauseIslands(Pp2TokenStream stream, SqlScopeResult sql, 110 ClauseScopeResult clause, Pp2FormatOptions opts, 111 List<IslandScope> out) { 112 int n = stream.size(); 113 int i = 0; 114 while (i < n) { 115 ClausePart part = clause.partAt(i); 116 IslandKind kind = islandKindFor(part); 117 int level = sql.levelAt(i); 118 if (kind == null) { i++; continue; } 119 120 // Extend the run while clause part and sql level stay the same. 121 int j = i; 122 while (j + 1 < n 123 && clause.partAt(j + 1) == part 124 && sql.levelAt(j + 1) == level) { 125 j++; 126 } 127 128 boolean complete = runHasOperand(stream, i, j); 129 IncompleteReason reason = complete 130 ? IncompleteReason.NONE : IncompleteReason.MISSING_OPERAND; 131 out.add(new IslandScope(kind, i, j, level, complete, reason)); 132 133 if (!complete) { 134 int cappedEnd = charCappedEnd(stream, i, j, opts.maxErrorRegionSize); 135 out.add(new IslandScope(IslandKind.ERROR_REGION, i, cappedEnd, level, 136 false, IncompleteReason.UNRECOVERABLE)); 137 } 138 i = j + 1; 139 } 140 } 141 142 /** A clause run is complete if it contains at least one non-comment, non-keyword token. */ 143 private static boolean runHasOperand(Pp2TokenStream stream, int start, int end) { 144 for (int k = start; k <= end; k++) { 145 ETokenType type = stream.get(k).getSourceToken().tokentype; 146 if (isComment(type)) continue; 147 if (type != ETokenType.ttkeyword) return true; 148 } 149 return false; 150 } 151 152 private static IslandKind islandKindFor(ClausePart part) { 153 switch (part) { 154 case SELECT_LIST: return IslandKind.SELECT; 155 case FROM: return IslandKind.FROM; 156 case JOIN: return IslandKind.JOIN; 157 case WHERE: return IslandKind.WHERE; 158 case GROUP_BY: return IslandKind.GROUP_BY; 159 case HAVING: return IslandKind.HAVING; 160 case ORDER_BY: return IslandKind.ORDER_BY; 161 case INTO: return IslandKind.INTO; 162 case INSERT_VALUES: return IslandKind.INSERT_VALUES; 163 case UPDATE_SET: return IslandKind.UPDATE_SET; 164 default: return null; // NONE, SET_OP -> not a clause island 165 } 166 } 167 168 // ---- subquery / CTE islands ----------------------------------------- 169 170 private void addSubqueryAndCteIslands(Pp2TokenStream stream, SqlScopeResult sql, 171 List<IslandScope> out) { 172 int lastSolid = lastNonTriviaIndex(stream); 173 for (SqlScope sc : sql.getScopes()) { 174 int start = sc.getOpenIndex(); 175 int end = sc.isClosed() ? sc.getCloseIndex() : Math.max(start, lastSolid); 176 IslandKind kind = isAsPreceded(stream, start) ? IslandKind.CTE : IslandKind.SUBQUERY; 177 boolean complete = sc.isClosed(); 178 IncompleteReason reason = complete 179 ? IncompleteReason.NONE : IncompleteReason.UNCLOSED_PAREN; 180 out.add(new IslandScope(kind, start, end, sc.getEnclosingLevel(), complete, reason)); 181 } 182 } 183 184 /** True if the first solid token before {@code openIndex} is the keyword AS. */ 185 private static boolean isAsPreceded(Pp2TokenStream stream, int openIndex) { 186 for (int j = openIndex - 1; j >= 0; j--) { 187 ETokenType type = stream.get(j).getSourceToken().tokentype; 188 if (isComment(type)) continue; 189 if (type != ETokenType.ttkeyword) return false; 190 String text = stream.get(j).getText(); 191 return text != null && "AS".equalsIgnoreCase(text); 192 } 193 return false; 194 } 195 196 // ---- CASE islands --------------------------------------------------- 197 198 /** Frame kinds on the CASE/BEGIN balancing stack. */ 199 private static final int FRAME_CASE = 0; 200 private static final int FRAME_BEGIN = 1; 201 202 private void addCaseIslands(Pp2TokenStream stream, SqlScopeResult sql, 203 List<IslandScope> out) { 204 int n = stream.size(); 205 // Unified stack of open CASE / BEGIN frames so an END pops the nearest 206 // open construct (a BEGIN's END must not close an enclosing CASE). 207 // Each entry is {frameKind, startIndex}. 208 Deque<int[]> stack = new ArrayDeque<int[]>(); 209 int skipCaseAt = -1; // index of a CASE that is the qualifier of "END CASE" 210 int lastSolid = lastNonTriviaIndex(stream); 211 212 for (int i = 0; i < n; i++) { 213 if (stream.get(i).getSourceToken().tokentype != ETokenType.ttkeyword) continue; 214 String upper = upperKeyword(stream, i); 215 216 if ("CASE".equals(upper)) { 217 if (i == skipCaseAt) { skipCaseAt = -1; continue; } 218 stack.push(new int[]{FRAME_CASE, i}); 219 } else if ("BEGIN".equals(upper)) { 220 stack.push(new int[]{FRAME_BEGIN, i}); 221 } else if ("END".equals(upper)) { 222 int next = nextSolidIndex(stream, i); 223 String nextUpper = next < 0 ? null : upperKeyword(stream, next); 224 // END IF / END LOOP / END WHILE / END REPEAT / END FOR close 225 // inner constructs this recogniser does not track; they pop nothing. 226 if (nextUpper != null && END_INNER_QUALIFIERS.contains(nextUpper)) { 227 continue; 228 } 229 if (stack.isEmpty()) continue; 230 boolean endCase = "CASE".equals(nextUpper); // the "END CASE" terminator 231 int[] frame = stack.pop(); 232 if (frame[0] == FRAME_CASE) { 233 // Include the trailing CASE token of an "END CASE" terminator. 234 int end = endCase ? next : i; 235 out.add(new IslandScope(IslandKind.CASE, frame[1], end, 236 sql.levelAt(frame[1]), true, IncompleteReason.NONE)); 237 } 238 // For a BEGIN frame we emit nothing (S19 owns block scopes); we 239 // only needed it to balance the END correctly. 240 if (endCase) skipCaseAt = next; // don't treat the qualifier as a new CASE 241 } 242 } 243 // Unterminated CASE blocks (any BEGIN frames left are S19's concern). 244 while (!stack.isEmpty()) { 245 int[] frame = stack.pop(); 246 if (frame[0] == FRAME_CASE) { 247 int end = Math.max(frame[1], lastSolid); 248 out.add(new IslandScope(IslandKind.CASE, frame[1], end, 249 sql.levelAt(frame[1]), false, IncompleteReason.MISSING_END)); 250 } 251 } 252 } 253 254 /** Uppercased text of the keyword token at i (assumes it is a ttkeyword). */ 255 private static String upperKeyword(Pp2TokenStream stream, int i) { 256 String text = stream.get(i).getText(); 257 return text == null ? "" : text.toUpperCase(Locale.ROOT); 258 } 259 260 // ---- MERGE islands -------------------------------------------------- 261 262 private void addMergeIslands(Pp2TokenStream stream, SqlScopeResult sql, 263 List<IslandScope> out) { 264 int n = stream.size(); 265 for (int i = 0; i < n; i++) { 266 if (!isKeyword(stream, i, "MERGE")) continue; 267 int end = i; 268 for (int j = i + 1; j < n; j++) { 269 if (stream.get(j).getSourceToken().tokentype == ETokenType.ttsemicolon) break; 270 end = j; 271 } 272 out.add(new IslandScope(IslandKind.MERGE, i, end, sql.levelAt(i), 273 true, IncompleteReason.NONE)); 274 } 275 } 276 277 // ---- helpers -------------------------------------------------------- 278 279 /** 280 * The greatest index {@code e} in {@code [start, end]} such that the summed 281 * source text length of tokens {@code [start, e]} does not exceed 282 * {@code maxChars}. At least {@code start} is always included. 283 */ 284 private static int charCappedEnd(Pp2TokenStream stream, int start, int end, int maxChars) { 285 if (maxChars <= 0) return start; 286 int chars = 0; 287 for (int k = start; k <= end; k++) { 288 String text = stream.get(k).getText(); 289 chars += (text == null ? 0 : text.length()); 290 if (chars > maxChars) return Math.max(start, k - 1); 291 } 292 return end; 293 } 294 295 private static int nextSolidIndex(Pp2TokenStream stream, int from) { 296 for (int j = from + 1; j < stream.size(); j++) { 297 if (!isComment(stream.get(j).getSourceToken().tokentype)) return j; 298 } 299 return -1; 300 } 301 302 private static int lastNonTriviaIndex(Pp2TokenStream stream) { 303 for (int j = stream.size() - 1; j >= 0; j--) { 304 if (!isComment(stream.get(j).getSourceToken().tokentype)) return j; 305 } 306 return Math.max(0, stream.size() - 1); 307 } 308 309 private static boolean isKeyword(Pp2TokenStream stream, int i, String upper) { 310 if (i < 0 || i >= stream.size()) return false; 311 Pp2Token t = stream.get(i); 312 if (t.getSourceToken().tokentype != ETokenType.ttkeyword) return false; 313 String text = t.getText(); 314 return text != null && text.toUpperCase(Locale.ROOT).equals(upper); 315 } 316 317 private static boolean isComment(ETokenType type) { 318 return type == ETokenType.ttsimplecomment 319 || type == ETokenType.ttbracketedcomment 320 || type == ETokenType.ttCPPComment; 321 } 322}