001package gudusoft.gsqlparser.pp2.island; 002 003import gudusoft.gsqlparser.ETokenType; 004import gudusoft.gsqlparser.TSourceToken; 005import gudusoft.gsqlparser.pp2.token.Pp2Token; 006import gudusoft.gsqlparser.pp2.token.Pp2TokenStream; 007 008import java.util.ArrayDeque; 009import java.util.ArrayList; 010import java.util.Arrays; 011import java.util.Collections; 012import java.util.Deque; 013import java.util.HashSet; 014import java.util.List; 015import java.util.Locale; 016import java.util.Set; 017 018/** 019 * Computes, for every token in a {@link Pp2TokenStream}, the lexical block 020 * nesting depth and the kind of block it sits directly inside 021 * (parenthesised group or {@code BEGIN ... END} block). 022 * 023 * <h2>Iterative, never recursive</h2> 024 * 025 * <p>Traversal uses an explicit {@link ArrayDeque} of open frames — never 026 * recursion — so a 2000-deep nested input cannot {@code StackOverflowError} 027 * (CLAUDE.md "CRITICAL: Iterative Traversal"; plan §10.1, §13/R3). The whole 028 * pass is a single left-to-right walk: O(n) time, O(maxDepth) stack space. 029 * 030 * <h2>Read-only</h2> 031 * 032 * <p>The detector does not mutate any {@link Pp2Token}, its roles, or the 033 * wrapped {@link TSourceToken}. It returns a {@link BlockScopeResult} side 034 * structure indexed by token position (consistent with S18's match list). 035 * S19 is not one of the designated role-annotator stages (S9/S21/S33), so it 036 * deliberately keeps its output off the token. 037 * 038 * <h2>Depth convention</h2> 039 * 040 * <p>An opener ({@code (} or {@code BEGIN}) and its matching closer 041 * ({@code )} or {@code END}) both report the <i>enclosing</i> depth; tokens 042 * strictly between them report enclosing depth + 1. A token's 043 * {@link BlockKind} is the kind of the frame it belongs to at that depth 044 * ({@link BlockKind#NONE} at depth 0). 045 * 046 * <h2>BEGIN/END handling</h2> 047 * 048 * <p>{@code BEGIN} pushes a {@link BlockKind#BEGIN_END} frame. An <i>unqualified</i> 049 * {@code END} (not immediately followed by {@code IF/LOOP/WHILE/REPEAT/FOR/CASE}) 050 * pops the top {@code BEGIN_END} frame. Qualified ends ({@code END IF}, etc.) 051 * close inner constructs that this detector does not track, so they do not pop 052 * the block frame — mirroring {@code StatementBoundaryDetector}. {@code CASE} 053 * blocks are out of S19 scope (handled by the S27 layout rules); mixing a bare 054 * {@code CASE ... END} inside a {@code BEGIN} block is a known limitation. 055 * 056 * <p>Plan reference: §7.3/S19, §7.4/S19, §10.1, §13/R3. 057 */ 058public final class BlockScopeDetector { 059 060 /** Keywords that, immediately after {@code END}, mark a qualified end. */ 061 private static final Set<String> END_QUALIFIERS; 062 static { 063 Set<String> q = new HashSet<String>(Arrays.asList( 064 "IF", "LOOP", "WHILE", "REPEAT", "FOR", "CASE")); 065 END_QUALIFIERS = Collections.unmodifiableSet(q); 066 } 067 068 /** 069 * Annotate block depth + kind for every token in {@code stream}. 070 * 071 * @param stream the token stream; must not be null 072 * @return a {@link BlockScopeResult} indexed by token position; never null 073 * @throws NullPointerException if {@code stream} is null 074 */ 075 public BlockScopeResult detect(Pp2TokenStream stream) { 076 if (stream == null) throw new NullPointerException("stream"); 077 078 int n = stream.size(); 079 int[] depth = new int[n]; 080 BlockKind[] kind = new BlockKind[n]; 081 082 Deque<Frame> stack = new ArrayDeque<Frame>(); 083 List<BlockScope> scopes = new ArrayList<BlockScope>(); 084 int maxDepth = 0; 085 086 for (int i = 0; i < n; i++) { 087 Pp2Token t = stream.get(i); 088 TSourceToken st = t.getSourceToken(); 089 ETokenType type = st.tokentype; 090 String text = t.getText(); 091 092 if (type == ETokenType.ttleftparenthesis) { 093 // Opener belongs to the enclosing scope, then we descend. 094 record(depth, kind, i, stack); 095 stack.push(new Frame(BlockKind.PAREN, i)); 096 } else if (type == ETokenType.ttrightparenthesis) { 097 // Closer belongs to the enclosing scope: pop first (if matching). 098 if (!stack.isEmpty() && stack.peek().kind == BlockKind.PAREN) { 099 Frame f = stack.pop(); 100 scopes.add(new BlockScope(f.kind, f.openIndex, i, stack.size())); 101 } 102 record(depth, kind, i, stack); 103 } else if (isKeyword(type, text, "BEGIN")) { 104 record(depth, kind, i, stack); 105 stack.push(new Frame(BlockKind.BEGIN_END, i)); 106 } else if (isKeyword(type, text, "END") && !isQualifiedEnd(stream, i)) { 107 if (!stack.isEmpty() && stack.peek().kind == BlockKind.BEGIN_END) { 108 Frame f = stack.pop(); 109 scopes.add(new BlockScope(f.kind, f.openIndex, i, stack.size())); 110 } 111 record(depth, kind, i, stack); 112 } else { 113 record(depth, kind, i, stack); 114 } 115 116 if (depth[i] > maxDepth) maxDepth = depth[i]; 117 } 118 119 // Any frames still open at EOF are unclosed (incomplete) blocks. 120 int unclosed = stack.size(); 121 while (!stack.isEmpty()) { 122 Frame f = stack.pop(); 123 scopes.add(new BlockScope(f.kind, f.openIndex, -1, stack.size())); 124 } 125 126 return new BlockScopeResult(depth, kind, maxDepth, unclosed, 127 Collections.unmodifiableList(scopes)); 128 } 129 130 /** Record token i's depth+kind from the current stack state (after open/close). */ 131 private static void record(int[] depth, BlockKind[] kind, int i, Deque<Frame> stack) { 132 depth[i] = stack.size(); 133 kind[i] = stack.isEmpty() ? BlockKind.NONE : stack.peek().kind; 134 } 135 136 private static boolean isKeyword(ETokenType type, String text, String upper) { 137 return type == ETokenType.ttkeyword 138 && text != null 139 && text.toUpperCase(Locale.ROOT).equals(upper); 140 } 141 142 /** 143 * True if the END at index {@code i} is followed by a qualifier keyword, 144 * skipping any intervening comment tokens (e.g. {@code END /*c*/ IF}). 145 * Comments are first-class tokens in the stream, so a naive 146 * {@code i + 1} check would misclassify {@code END <comment> IF} as 147 * unqualified and pop the block prematurely. 148 */ 149 private static boolean isQualifiedEnd(Pp2TokenStream stream, int i) { 150 int j = nextSolidIndex(stream, i); 151 if (j < 0) return false; 152 Pp2Token next = stream.get(j); 153 if (next.getSourceToken().tokentype != ETokenType.ttkeyword) return false; 154 String nextText = next.getText(); 155 if (nextText == null) return false; 156 return END_QUALIFIERS.contains(nextText.toUpperCase(Locale.ROOT)); 157 } 158 159 /** Index of the first non-comment token after {@code from}, or -1 if none. */ 160 private static int nextSolidIndex(Pp2TokenStream stream, int from) { 161 for (int j = from + 1; j < stream.size(); j++) { 162 if (!isComment(stream.get(j).getSourceToken().tokentype)) return j; 163 } 164 return -1; 165 } 166 167 private static boolean isComment(ETokenType type) { 168 return type == ETokenType.ttsimplecomment 169 || type == ETokenType.ttbracketedcomment 170 || type == ETokenType.ttCPPComment; 171 } 172 173 /** A single open block frame, kept on the explicit traversal stack. */ 174 private static final class Frame { 175 final BlockKind kind; 176 final int openIndex; 177 Frame(BlockKind kind, int openIndex) { 178 this.kind = kind; 179 this.openIndex = openIndex; 180 } 181 } 182 183 /** 184 * Per-token block depth + kind, plus the list of detected scopes. Indexed 185 * by the same token positions as the source {@link Pp2TokenStream}. 186 */ 187 public static final class BlockScopeResult { 188 private final int[] depth; 189 private final BlockKind[] kind; 190 private final int maxDepth; 191 private final int unclosedCount; 192 private final List<BlockScope> scopes; 193 194 BlockScopeResult(int[] depth, BlockKind[] kind, int maxDepth, 195 int unclosedCount, List<BlockScope> scopes) { 196 this.depth = depth; 197 this.kind = kind; 198 this.maxDepth = maxDepth; 199 this.unclosedCount = unclosedCount; 200 this.scopes = scopes; 201 } 202 203 /** Number of tokens covered. */ 204 public int size() { return depth.length; } 205 206 /** Block nesting depth at token {@code index} (0 = top level). */ 207 public int depthAt(int index) { return depth[index]; } 208 209 /** Kind of block the token at {@code index} sits directly inside. */ 210 public BlockKind kindAt(int index) { return kind[index]; } 211 212 /** Maximum nesting depth observed across the stream. */ 213 public int getMaxDepth() { return maxDepth; } 214 215 /** Number of frames still open at end of input (unbalanced openers). */ 216 public int getUnclosedCount() { return unclosedCount; } 217 218 /** 219 * All detected scopes, in close order (then unclosed frames). Each 220 * carries its opener index, closer index ({@code -1} if never closed), 221 * kind, and the enclosing depth at which it sits. 222 */ 223 public List<BlockScope> getScopes() { return scopes; } 224 } 225 226 /** An immutable detected block span. */ 227 public static final class BlockScope { 228 private final BlockKind kind; 229 private final int openIndex; 230 private final int closeIndex; // -1 if unclosed 231 private final int enclosingDepth; 232 233 BlockScope(BlockKind kind, int openIndex, int closeIndex, int enclosingDepth) { 234 this.kind = kind; 235 this.openIndex = openIndex; 236 this.closeIndex = closeIndex; 237 this.enclosingDepth = enclosingDepth; 238 } 239 240 public BlockKind getKind() { return kind; } 241 public int getOpenIndex() { return openIndex; } 242 public int getCloseIndex() { return closeIndex; } 243 public boolean isClosed() { return closeIndex >= 0; } 244 public int getEnclosingDepth() { return enclosingDepth; } 245 246 @Override 247 public String toString() { 248 return "BlockScope[" + kind + " open=" + openIndex 249 + " close=" + closeIndex + " depth=" + enclosingDepth + "]"; 250 } 251 } 252}