001package gudusoft.gsqlparser.pp2.render; 002 003import gudusoft.gsqlparser.TSourceToken; 004import gudusoft.gsqlparser.pp.logger.PPLogger; 005import gudusoft.gsqlparser.pp2.Pp2FormatOptions; 006import gudusoft.gsqlparser.pp2.RendererId; 007import gudusoft.gsqlparser.pp2.region.RegionParseOutcome; 008import gudusoft.gsqlparser.pp2.region.StatementRange; 009import gudusoft.gsqlparser.pp2.token.Pp2Token; 010import gudusoft.gsqlparser.pp2.token.Pp2TokenStream; 011import gudusoft.gsqlparser.pp2.token.TokenRole; 012import gudusoft.gsqlparser.pp2.zone.CommentPolicy; 013 014/** 015 * The MVP fallback renderer (plan §7.3/S14): walks the region's slice of a 016 * {@link Pp2TokenStream} and emits each token's text with light spacing. 017 * No clause-level layout, no recognition pipeline — pure token passthrough. 018 * 019 * <h2>Why it exists</h2> 020 * 021 * <p>{@link GuardedAstDelegate} (S13) only fires on regions whose source 022 * actually parses. Everything else — invalid SQL, partial scripts, vendor 023 * fragments the parser doesn't yet support — needs a renderer that 024 * guarantees <b>every solid input token appears in the output, in order</b> 025 * without throwing or losing bytes. Until the lexical island pipeline 026 * (S31) ships, the conservative renderer is the only safety net the engine 027 * has. Even after S31 lands it stays as the last-resort renderer for 028 * cases where the island pipeline itself throws. 029 * 030 * <h2>Output shape</h2> 031 * 032 * <p>Two operating modes, selected by 033 * {@link Pp2FormatOptions#commentPolicy}: 034 * 035 * <ul> 036 * <li><b>{@link CommentPolicy#PRESERVE} (default):</b> emit each token's 037 * original preceding whitespace verbatim — the renderer is a 038 * byte-exact passthrough modulo the inter-region leading trivia, 039 * which is handed off to the region assembler (S15).</li> 040 * <li><b>{@link CommentPolicy#REANCHOR} / {@link CommentPolicy#REFLOW}:</b> 041 * a single space between consecutive tokens, with a {@code "\n"} 042 * inserted after every {@code ";"}. No other layout choices are 043 * made — the renderer never reorders, never drops, never merges.</li> 044 * </ul> 045 * 046 * <p>Regardless of policy, tokens carrying the 047 * {@link TokenRole#NO_FORMAT_ZONE} role are emitted with their original 048 * preceding whitespace, so {@code --BEGIN_NO_FORMAT}/{@code --END_NO_FORMAT} 049 * blocks survive byte-exact through any policy. 050 * 051 * <h2>Leading-trivia ownership</h2> 052 * 053 * <p>The renderer never emits leading whitespace for the <i>first</i> 054 * token of the range — that whitespace belongs to the inter-region gap 055 * handled by the assembler (S15). The renderer's output starts with the 056 * first token's text and ends with the last token's text. Trailing 057 * trivia after the last token is similarly the assembler's responsibility. 058 * 059 * <h2>Content-preservation guarantee</h2> 060 * 061 * <p>Every {@link Pp2Token} in the range contributes its text to the 062 * output exactly once, in source order; nothing is dropped, reordered, or 063 * collapsed. This guarantee is verified by 064 * {@link gudusoft.gsqlparser.pp2.token.TokenEquivalence} in 065 * {@code ConservativeTokenRendererTest}. 066 * 067 * <p>Plan reference: §5.2, §7.3/S14, §7.4/S14. 068 */ 069public final class ConservativeTokenRenderer implements RegionRenderer { 070 071 private long renderedRegionCount; 072 private long emittedTokenCount; 073 074 /** Number of regions {@code render(...)} has produced output for. */ 075 public long getRenderedRegionCount() { return renderedRegionCount; } 076 077 /** Number of tokens emitted across all rendered regions. */ 078 public long getEmittedTokenCount() { return emittedTokenCount; } 079 080 @Override 081 public RendererId id() { return RendererId.CONSERVATIVE; } 082 083 @Override 084 public String render(RegionParseOutcome outcome, 085 Pp2TokenStream stream, 086 Pp2FormatOptions opts) { 087 if (outcome == null) throw new NullPointerException("outcome"); 088 if (stream == null) throw new NullPointerException("stream"); 089 if (opts == null) throw new NullPointerException("opts"); 090 091 StatementRange range = outcome.getRange(); 092 int start = range.getStartTokenIndex(); 093 int end = Math.min(range.getEndTokenIndex(), stream.size()); 094 if (start >= end) { 095 // Empty range — a valid rendering for an empty region. Returning 096 // "" here matches the RegionRenderer contract ("" is not the 097 // fall-through sentinel). 098 renderedRegionCount++; 099 return ""; 100 } 101 102 boolean preserve = opts.commentPolicy == CommentPolicy.PRESERVE; 103 StringBuilder out = new StringBuilder(); 104 boolean prevWasSemicolon = false; 105 boolean prevWasLineComment = false; 106 String parsedSql = outcome.getParsedSql(); 107 int parsedSqlBase = range.getStartOffset(); 108 109 for (int i = start; i < end; ) { 110 Pp2Token tok; 111 try { 112 tok = stream.get(i); 113 } catch (Throwable t) { 114 // Defensive: the stream should never throw on a valid index 115 // but the renderer's no-throw contract is absolute. 116 PPLogger.error(t); 117 PPLogger.info("ConservativeTokenRenderer: stream.get(" + i 118 + ") threw; truncating output. range=" + range); 119 break; 120 } 121 if (tok == null) { i++; continue; } 122 123 if (tok.hasRole(TokenRole.NO_FORMAT_ZONE)) { 124 // Emit the entire contiguous NO_FORMAT_ZONE span byte-exact 125 // by slicing the source bytes from parsedSql. This recovers 126 // the original linebreak-vs-blank ordering that the token 127 // stream's precedingLinebreaks/precedingBlanks counts lose. 128 int spanEnd = i; 129 while (spanEnd < end 130 && stream.get(spanEnd).hasRole(TokenRole.NO_FORMAT_ZONE)) { 131 spanEnd++; 132 } 133 // Route the inter-token separator through the standard 134 // path so PRESERVE preserves indentation, prevWasLineComment 135 // forces a newline (with preserved indent in PRESERVE 136 // mode), and prevWasSemicolon / fall-through still apply. 137 // Codex round-2 P2 #1: the previous version called 138 // ensureTrailingNewline() which dropped PRESERVE indent in 139 // the "-- c\n --BEGIN_NO_FORMAT" case. 140 if (i > start) { 141 appendSeparator(out, stream.get(i), preserve, 142 prevWasSemicolon, prevWasLineComment); 143 } 144 String spanText = sliceFrozenZone(parsedSql, parsedSqlBase, 145 stream, i, spanEnd - 1); 146 if (spanText == null) { 147 // Source slicing failed (e.g., outcome.getParsedSql() did 148 // not cover the absolute offsets, or text did not 149 // round-trip). Fall back to per-token emission so 150 // content is still preserved. 151 for (int j = i; j < spanEnd; j++) { 152 Pp2Token zoneTok = stream.get(j); 153 if (j > i) { 154 appendOriginalWhitespace(out, zoneTok); 155 } 156 String t2 = zoneTok.getText(); 157 if (t2 != null) out.append(t2); 158 emittedTokenCount++; 159 } 160 } else { 161 out.append(spanText); 162 emittedTokenCount += (spanEnd - i); 163 } 164 Pp2Token last = stream.get(spanEnd - 1); 165 String lastText = last.getText(); 166 prevWasSemicolon = lastText != null && ";".equals(lastText); 167 // --END_NO_FORMAT is itself a line comment; force the next 168 // token to start on a new line so a non-PRESERVE renderer 169 // does not splice "--END_NO_FORMAT SELECT 2" into one line. 170 prevWasLineComment = isLineComment(last); 171 i = spanEnd; 172 continue; 173 } 174 175 String text = tok.getText(); 176 if (text == null) text = ""; 177 178 if (i > start) { 179 appendSeparator(out, tok, preserve, prevWasSemicolon, 180 prevWasLineComment); 181 } 182 out.append(text); 183 184 prevWasSemicolon = ";".equals(text); 185 prevWasLineComment = isLineComment(tok); 186 emittedTokenCount++; 187 i++; 188 } 189 190 renderedRegionCount++; 191 return out.toString(); 192 } 193 194 private static boolean isLineComment(Pp2Token tok) { 195 if (tok == null) return false; 196 return tok.hasRole(TokenRole.COMMENT_LINE); 197 } 198 199 /** 200 * Slice the byte-exact source text for the NO_FORMAT_ZONE span covering 201 * stream tokens {@code [first, last]} (inclusive). Returns {@code null} 202 * if any input is unusable, OR if the slice does not round-trip to the 203 * expected first/last token texts. 204 * 205 * <p>{@code parsedSql} is the engine-supplied region source slice. The 206 * absolute offsets carried by {@link TSourceToken} are translated into 207 * relative positions by subtracting {@code base = range.startOffset}. 208 * 209 * <p>The text-match validation catches the case where 210 * {@code parsedSql} is not the contiguous source bytes for the range 211 * (e.g., the engine stripped a trailing {@code GO} for MSSQL, or any 212 * future normalization shifted offsets). When the slice's leading 213 * characters don't match the first token's text — or trailing don't 214 * match the last token's text — we treat the source as untrusted and 215 * fall back to per-token emission so content is still preserved. 216 */ 217 private static String sliceFrozenZone(String parsedSql, int base, 218 Pp2TokenStream stream, 219 int first, int last) { 220 if (parsedSql == null) return null; 221 Pp2Token f = stream.get(first); 222 Pp2Token l = stream.get(last); 223 if (f == null || l == null) return null; 224 TSourceToken fSt = f.getSourceToken(); 225 TSourceToken lSt = l.getSourceToken(); 226 if (fSt == null || lSt == null) return null; 227 String lText = l.getText(); 228 if (lText == null) lText = ""; 229 int relStart = (int) fSt.offset - base; 230 int relEnd = (int) lSt.offset + lText.length() - base; 231 if (relStart < 0 || relEnd > parsedSql.length() || relStart > relEnd) { 232 return null; 233 } 234 // Defensive: verify every token in the span round-trips to the 235 // expected source bytes at its relative offset. A non-null but 236 // wrong slice can happen if parsedSql is not the contiguous range 237 // source (e.g., GO-stripped). Endpoint-only validation could be 238 // fooled by a coincidental match at the boundaries; checking 239 // every token catches a middle-section shift. (Codex R2 P2 #2.) 240 for (int k = first; k <= last; k++) { 241 Pp2Token t = stream.get(k); 242 if (t == null) return null; 243 TSourceToken st = t.getSourceToken(); 244 if (st == null) return null; 245 String text = t.getText(); 246 if (text == null) text = ""; 247 int rel = (int) st.offset - base; 248 if (rel < relStart || rel + text.length() > relEnd) return null; 249 if (!parsedSql.regionMatches(rel, text, 0, text.length())) { 250 return null; 251 } 252 } 253 return parsedSql.substring(relStart, relEnd); 254 } 255 256 /** 257 * Append the inter-token whitespace separator preceding {@code tok}. 258 * Precedence: 259 * 260 * <ol> 261 * <li>Previous token was a line comment ({@code --} or {@code #}) 262 * → emit {@code "\n"}. Without this, a non-PRESERVE renderer 263 * would splice {@code "-- comment SELECT 2"} into one line and 264 * comment out the next token. The check fires BEFORE the 265 * policy branches so it applies under both PRESERVE and 266 * non-PRESERVE: if PRESERVE's source whitespace already has a 267 * linebreak, we use that; otherwise we force one in.</li> 268 * <li>{@code PRESERVE} policy → emit original 269 * {@code precedingLinebreaks} + {@code precedingBlanks}. Two 270 * source tokens that were directly adjacent stay adjacent.</li> 271 * <li>Previous solid token was {@code ;} → emit {@code "\n"}. The 272 * "linebreak after ;" rule (plan §7.3/S14). PRESERVE mode skips 273 * this branch — the source whitespace is the source of truth.</li> 274 * <li>Otherwise → emit a single space.</li> 275 * </ol> 276 * 277 * <p>{@code NO_FORMAT_ZONE} tokens are not routed here — the caller 278 * handles the contiguous zone by source-slicing for byte-exact 279 * fidelity. 280 */ 281 private static void appendSeparator(StringBuilder out, Pp2Token tok, 282 boolean preserve, 283 boolean prevWasSemicolon, 284 boolean prevWasLineComment) { 285 if (prevWasLineComment) { 286 if (preserve) { 287 // PRESERVE: emit original whitespace, but guarantee at 288 // least one newline so the line comment terminates. 289 int linebreaks = tok.getPrecedingLinebreaks(); 290 if (linebreaks <= 0) { 291 out.append('\n'); 292 } else { 293 for (int k = 0; k < linebreaks; k++) out.append('\n'); 294 } 295 int blanks = tok.getPrecedingBlanks(); 296 for (int k = 0; k < blanks; k++) out.append(' '); 297 } else { 298 out.append('\n'); 299 } 300 return; 301 } 302 if (preserve) { 303 appendOriginalWhitespace(out, tok); 304 return; 305 } 306 if (prevWasSemicolon) { 307 out.append('\n'); 308 return; 309 } 310 out.append(' '); 311 } 312 313 private static void appendOriginalWhitespace(StringBuilder out, Pp2Token tok) { 314 int linebreaks = tok.getPrecedingLinebreaks(); 315 for (int k = 0; k < linebreaks; k++) out.append('\n'); 316 int blanks = tok.getPrecedingBlanks(); 317 for (int k = 0; k < blanks; k++) out.append(' '); 318 } 319}