Source code

001package gudusoft.gsqlparser.pp2.render;
002
003import gudusoft.gsqlparser.TSourceToken;
004import gudusoft.gsqlparser.pp.logger.PPLogger;
005import gudusoft.gsqlparser.pp2.Pp2FormatOptions;
006import gudusoft.gsqlparser.pp2.RendererId;
007import gudusoft.gsqlparser.pp2.region.RegionParseOutcome;
008import gudusoft.gsqlparser.pp2.region.StatementRange;
009import gudusoft.gsqlparser.pp2.token.Pp2Token;
010import gudusoft.gsqlparser.pp2.token.Pp2TokenStream;
011import gudusoft.gsqlparser.pp2.token.TokenRole;
012import gudusoft.gsqlparser.pp2.zone.CommentPolicy;
013
014/**
015 * The MVP fallback renderer (plan §7.3/S14): walks the region's slice of a
016 * {@link Pp2TokenStream} and emits each token's text with light spacing.
017 * No clause-level layout, no recognition pipeline — pure token passthrough.
018 *
019 * <h2>Why it exists</h2>
020 *
021 * <p>{@link GuardedAstDelegate} (S13) only fires on regions whose source
022 * actually parses. Everything else — invalid SQL, partial scripts, vendor
023 * fragments the parser doesn't yet support — needs a renderer that
024 * guarantees <b>every solid input token appears in the output, in order</b>
025 * without throwing or losing bytes. Until the lexical island pipeline
026 * (S31) ships, the conservative renderer is the only safety net the engine
027 * has. Even after S31 lands it stays as the last-resort renderer for
028 * cases where the island pipeline itself throws.
029 *
030 * <h2>Output shape</h2>
031 *
032 * <p>Two operating modes, selected by
033 * {@link Pp2FormatOptions#commentPolicy}:
034 *
035 * <ul>
036 *   <li><b>{@link CommentPolicy#PRESERVE} (default):</b> emit each token's
037 *       original preceding whitespace verbatim — the renderer is a
038 *       byte-exact passthrough modulo the inter-region leading trivia,
039 *       which is handed off to the region assembler (S15).</li>
040 *   <li><b>{@link CommentPolicy#REANCHOR} / {@link CommentPolicy#REFLOW}:</b>
041 *       a single space between consecutive tokens, with a {@code "\n"}
042 *       inserted after every {@code ";"}. No other layout choices are
043 *       made — the renderer never reorders, never drops, never merges.</li>
044 * </ul>
045 *
046 * <p>Regardless of policy, tokens carrying the
047 * {@link TokenRole#NO_FORMAT_ZONE} role are emitted with their original
048 * preceding whitespace, so {@code --BEGIN_NO_FORMAT}/{@code --END_NO_FORMAT}
049 * blocks survive byte-exact through any policy.
050 *
051 * <h2>Leading-trivia ownership</h2>
052 *
053 * <p>The renderer never emits leading whitespace for the <i>first</i>
054 * token of the range — that whitespace belongs to the inter-region gap
055 * handled by the assembler (S15). The renderer's output starts with the
056 * first token's text and ends with the last token's text. Trailing
057 * trivia after the last token is similarly the assembler's responsibility.
058 *
059 * <h2>Content-preservation guarantee</h2>
060 *
061 * <p>Every {@link Pp2Token} in the range contributes its text to the
062 * output exactly once, in source order; nothing is dropped, reordered, or
063 * collapsed. This guarantee is verified by
064 * {@link gudusoft.gsqlparser.pp2.token.TokenEquivalence} in
065 * {@code ConservativeTokenRendererTest}.
066 *
067 * <p>Plan reference: §5.2, §7.3/S14, §7.4/S14.
068 */
069public final class ConservativeTokenRenderer implements RegionRenderer {
070
071    private long renderedRegionCount;
072    private long emittedTokenCount;
073
074    /** Number of regions {@code render(...)} has produced output for. */
075    public long getRenderedRegionCount() { return renderedRegionCount; }
076
077    /** Number of tokens emitted across all rendered regions. */
078    public long getEmittedTokenCount() { return emittedTokenCount; }
079
080    @Override
081    public RendererId id() { return RendererId.CONSERVATIVE; }
082
083    @Override
084    public String render(RegionParseOutcome outcome,
085                         Pp2TokenStream stream,
086                         Pp2FormatOptions opts) {
087        if (outcome == null) throw new NullPointerException("outcome");
088        if (stream == null) throw new NullPointerException("stream");
089        if (opts == null) throw new NullPointerException("opts");
090
091        StatementRange range = outcome.getRange();
092        int start = range.getStartTokenIndex();
093        int end = Math.min(range.getEndTokenIndex(), stream.size());
094        if (start >= end) {
095            // Empty range — a valid rendering for an empty region. Returning
096            // "" here matches the RegionRenderer contract ("" is not the
097            // fall-through sentinel).
098            renderedRegionCount++;
099            return "";
100        }
101
102        boolean preserve = opts.commentPolicy == CommentPolicy.PRESERVE;
103        StringBuilder out = new StringBuilder();
104        boolean prevWasSemicolon = false;
105        boolean prevWasLineComment = false;
106        String parsedSql = outcome.getParsedSql();
107        int parsedSqlBase = range.getStartOffset();
108
109        for (int i = start; i < end; ) {
110            Pp2Token tok;
111            try {
112                tok = stream.get(i);
113            } catch (Throwable t) {
114                // Defensive: the stream should never throw on a valid index
115                // but the renderer's no-throw contract is absolute.
116                PPLogger.error(t);
117                PPLogger.info("ConservativeTokenRenderer: stream.get(" + i
118                    + ") threw; truncating output. range=" + range);
119                break;
120            }
121            if (tok == null) { i++; continue; }
122
123            if (tok.hasRole(TokenRole.NO_FORMAT_ZONE)) {
124                // Emit the entire contiguous NO_FORMAT_ZONE span byte-exact
125                // by slicing the source bytes from parsedSql. This recovers
126                // the original linebreak-vs-blank ordering that the token
127                // stream's precedingLinebreaks/precedingBlanks counts lose.
128                int spanEnd = i;
129                while (spanEnd < end
130                    && stream.get(spanEnd).hasRole(TokenRole.NO_FORMAT_ZONE)) {
131                    spanEnd++;
132                }
133                // Route the inter-token separator through the standard
134                // path so PRESERVE preserves indentation, prevWasLineComment
135                // forces a newline (with preserved indent in PRESERVE
136                // mode), and prevWasSemicolon / fall-through still apply.
137                // Codex round-2 P2 #1: the previous version called
138                // ensureTrailingNewline() which dropped PRESERVE indent in
139                // the "-- c\n  --BEGIN_NO_FORMAT" case.
140                if (i > start) {
141                    appendSeparator(out, stream.get(i), preserve,
142                        prevWasSemicolon, prevWasLineComment);
143                }
144                String spanText = sliceFrozenZone(parsedSql, parsedSqlBase,
145                    stream, i, spanEnd - 1);
146                if (spanText == null) {
147                    // Source slicing failed (e.g., outcome.getParsedSql() did
148                    // not cover the absolute offsets, or text did not
149                    // round-trip). Fall back to per-token emission so
150                    // content is still preserved.
151                    for (int j = i; j < spanEnd; j++) {
152                        Pp2Token zoneTok = stream.get(j);
153                        if (j > i) {
154                            appendOriginalWhitespace(out, zoneTok);
155                        }
156                        String t2 = zoneTok.getText();
157                        if (t2 != null) out.append(t2);
158                        emittedTokenCount++;
159                    }
160                } else {
161                    out.append(spanText);
162                    emittedTokenCount += (spanEnd - i);
163                }
164                Pp2Token last = stream.get(spanEnd - 1);
165                String lastText = last.getText();
166                prevWasSemicolon = lastText != null && ";".equals(lastText);
167                // --END_NO_FORMAT is itself a line comment; force the next
168                // token to start on a new line so a non-PRESERVE renderer
169                // does not splice "--END_NO_FORMAT SELECT 2" into one line.
170                prevWasLineComment = isLineComment(last);
171                i = spanEnd;
172                continue;
173            }
174
175            String text = tok.getText();
176            if (text == null) text = "";
177
178            if (i > start) {
179                appendSeparator(out, tok, preserve, prevWasSemicolon,
180                    prevWasLineComment);
181            }
182            out.append(text);
183
184            prevWasSemicolon = ";".equals(text);
185            prevWasLineComment = isLineComment(tok);
186            emittedTokenCount++;
187            i++;
188        }
189
190        renderedRegionCount++;
191        return out.toString();
192    }
193
194    private static boolean isLineComment(Pp2Token tok) {
195        if (tok == null) return false;
196        return tok.hasRole(TokenRole.COMMENT_LINE);
197    }
198
199    /**
200     * Slice the byte-exact source text for the NO_FORMAT_ZONE span covering
201     * stream tokens {@code [first, last]} (inclusive). Returns {@code null}
202     * if any input is unusable, OR if the slice does not round-trip to the
203     * expected first/last token texts.
204     *
205     * <p>{@code parsedSql} is the engine-supplied region source slice. The
206     * absolute offsets carried by {@link TSourceToken} are translated into
207     * relative positions by subtracting {@code base = range.startOffset}.
208     *
209     * <p>The text-match validation catches the case where
210     * {@code parsedSql} is not the contiguous source bytes for the range
211     * (e.g., the engine stripped a trailing {@code GO} for MSSQL, or any
212     * future normalization shifted offsets). When the slice's leading
213     * characters don't match the first token's text — or trailing don't
214     * match the last token's text — we treat the source as untrusted and
215     * fall back to per-token emission so content is still preserved.
216     */
217    private static String sliceFrozenZone(String parsedSql, int base,
218                                          Pp2TokenStream stream,
219                                          int first, int last) {
220        if (parsedSql == null) return null;
221        Pp2Token f = stream.get(first);
222        Pp2Token l = stream.get(last);
223        if (f == null || l == null) return null;
224        TSourceToken fSt = f.getSourceToken();
225        TSourceToken lSt = l.getSourceToken();
226        if (fSt == null || lSt == null) return null;
227        String lText = l.getText();
228        if (lText == null) lText = "";
229        int relStart = (int) fSt.offset - base;
230        int relEnd = (int) lSt.offset + lText.length() - base;
231        if (relStart < 0 || relEnd > parsedSql.length() || relStart > relEnd) {
232            return null;
233        }
234        // Defensive: verify every token in the span round-trips to the
235        // expected source bytes at its relative offset. A non-null but
236        // wrong slice can happen if parsedSql is not the contiguous range
237        // source (e.g., GO-stripped). Endpoint-only validation could be
238        // fooled by a coincidental match at the boundaries; checking
239        // every token catches a middle-section shift. (Codex R2 P2 #2.)
240        for (int k = first; k <= last; k++) {
241            Pp2Token t = stream.get(k);
242            if (t == null) return null;
243            TSourceToken st = t.getSourceToken();
244            if (st == null) return null;
245            String text = t.getText();
246            if (text == null) text = "";
247            int rel = (int) st.offset - base;
248            if (rel < relStart || rel + text.length() > relEnd) return null;
249            if (!parsedSql.regionMatches(rel, text, 0, text.length())) {
250                return null;
251            }
252        }
253        return parsedSql.substring(relStart, relEnd);
254    }
255
256    /**
257     * Append the inter-token whitespace separator preceding {@code tok}.
258     * Precedence:
259     *
260     * <ol>
261     *   <li>Previous token was a line comment ({@code --} or {@code #})
262     *       → emit {@code "\n"}. Without this, a non-PRESERVE renderer
263     *       would splice {@code "-- comment SELECT 2"} into one line and
264     *       comment out the next token. The check fires BEFORE the
265     *       policy branches so it applies under both PRESERVE and
266     *       non-PRESERVE: if PRESERVE's source whitespace already has a
267     *       linebreak, we use that; otherwise we force one in.</li>
268     *   <li>{@code PRESERVE} policy → emit original
269     *       {@code precedingLinebreaks} + {@code precedingBlanks}. Two
270     *       source tokens that were directly adjacent stay adjacent.</li>
271     *   <li>Previous solid token was {@code ;} → emit {@code "\n"}. The
272     *       "linebreak after ;" rule (plan §7.3/S14). PRESERVE mode skips
273     *       this branch — the source whitespace is the source of truth.</li>
274     *   <li>Otherwise → emit a single space.</li>
275     * </ol>
276     *
277     * <p>{@code NO_FORMAT_ZONE} tokens are not routed here — the caller
278     * handles the contiguous zone by source-slicing for byte-exact
279     * fidelity.
280     */
281    private static void appendSeparator(StringBuilder out, Pp2Token tok,
282                                        boolean preserve,
283                                        boolean prevWasSemicolon,
284                                        boolean prevWasLineComment) {
285        if (prevWasLineComment) {
286            if (preserve) {
287                // PRESERVE: emit original whitespace, but guarantee at
288                // least one newline so the line comment terminates.
289                int linebreaks = tok.getPrecedingLinebreaks();
290                if (linebreaks <= 0) {
291                    out.append('\n');
292                } else {
293                    for (int k = 0; k < linebreaks; k++) out.append('\n');
294                }
295                int blanks = tok.getPrecedingBlanks();
296                for (int k = 0; k < blanks; k++) out.append(' ');
297            } else {
298                out.append('\n');
299            }
300            return;
301        }
302        if (preserve) {
303            appendOriginalWhitespace(out, tok);
304            return;
305        }
306        if (prevWasSemicolon) {
307            out.append('\n');
308            return;
309        }
310        out.append(' ');
311    }
312
313    private static void appendOriginalWhitespace(StringBuilder out, Pp2Token tok) {
314        int linebreaks = tok.getPrecedingLinebreaks();
315        for (int k = 0; k < linebreaks; k++) out.append('\n');
316        int blanks = tok.getPrecedingBlanks();
317        for (int k = 0; k < blanks; k++) out.append(' ');
318    }
319}