001package gudusoft.gsqlparser.pp2.render; 002 003import gudusoft.gsqlparser.pp.logger.PPLogger; 004import gudusoft.gsqlparser.pp2.Pp2FormatOptions; 005import gudusoft.gsqlparser.pp2.token.SourceSpanLedger; 006import gudusoft.gsqlparser.pp2.zone.CommentPolicy; 007 008import java.util.List; 009 010/** 011 * Assembles per-region rendered texts into a single output string, bridging 012 * gaps between regions with the original inter-region trivia extracted from 013 * the {@link SourceSpanLedger}. 014 * 015 * <h2>Why the ledger owns inter-region trivia</h2> 016 * 017 * <p>Every {@link RegionRenderer} intentionally omits the leading whitespace 018 * for the <em>first</em> token of its range (see {@link RenderedRegion}). 019 * The bytes between adjacent regions — whitespace, comments, blank lines, 020 * {@code --BEGIN_NO_FORMAT}/{@code --END_NO_FORMAT} blocks — live in the 021 * original source at positions {@code [r_i.endOffset, r_{i+1}.startOffset)}. 022 * The assembler is the counterparty that places those bytes into the output, 023 * so they reach the caller byte-for-byte regardless of which renderer handled 024 * the surrounding statements. 025 * 026 * <h2>Assembly model</h2> 027 * 028 * <p>Given an ordered, non-overlapping list of {@link RenderedRegion}s: 029 * <ol> 030 * <li><b>Preamble</b> — {@code source[0, first.startOffset)}</li> 031 * <li>For each region {@code r_i}: 032 * <ol> 033 * <li>Emit {@code r_i.getText()} (the renderer's output)</li> 034 * <li>Emit {@code source[r_i.endOffset, r_{i+1}.startOffset)} 035 * (inter-region gap; the final region skips this step)</li> 036 * </ol> 037 * </li> 038 * <li><b>Trailer</b> — {@code source[last.endOffset, source.length())}</li> 039 * </ol> 040 * 041 * <p>An empty regions list returns the entire source string unchanged — all 042 * bytes come from the preamble/trailer path. 043 * 044 * <h2>Comment policy</h2> 045 * 046 * <p>Phase-2 MVP implements only {@link CommentPolicy#PRESERVE}: inter-region 047 * bytes are always emitted verbatim. {@link CommentPolicy#REANCHOR} and 048 * {@link CommentPolicy#REFLOW} log a notice and fall back to PRESERVE; the 049 * Phase-3 island pipeline will provide real semantics. 050 * 051 * <h2>Defensive behaviour</h2> 052 * 053 * <p>Overlapping or out-of-order region offsets (which should never occur when 054 * {@code StatementBoundaryDetector} S11 produces the list) are detected and 055 * logged; overlap bytes are skipped rather than emitted twice. Out-of-bounds 056 * source-slice requests are also guarded — no byte is lost due to an offset 057 * arithmetic bug in the caller. 058 * 059 * <p>Plan reference: §7.3/S15, §7.4/S15, §10.4. 060 */ 061public final class RegionAssembler { 062 063 /** 064 * Assemble rendered regions into a final output string. 065 * 066 * @param regions ordered (source position), non-overlapping list of 067 * rendered regions; empty list is allowed 068 * @param ledger the source-span ledger for the original SQL; 069 * {@link SourceSpanLedger#getSource()} is the byte authority 070 * @param opts pp2 format options; 071 * {@link Pp2FormatOptions#commentPolicy} governs how 072 * inter-region trivia is emitted 073 * @return the assembled output string; never null 074 * @throws NullPointerException if any argument is null 075 */ 076 public String assemble(List<RenderedRegion> regions, 077 SourceSpanLedger ledger, 078 Pp2FormatOptions opts) { 079 if (regions == null) throw new NullPointerException("regions"); 080 if (ledger == null) throw new NullPointerException("ledger"); 081 if (opts == null) throw new NullPointerException("opts"); 082 083 String source = ledger.getSource(); 084 085 if (regions.isEmpty()) { 086 // All bytes are inter-region trivia; return source unchanged. 087 return source; 088 } 089 090 if (opts.commentPolicy != CommentPolicy.PRESERVE) { 091 PPLogger.info("RegionAssembler: commentPolicy=" 092 + opts.commentPolicy 093 + " is not yet implemented in the Phase-2 MVP; " 094 + "falling back to PRESERVE (verbatim inter-region trivia)"); 095 } 096 097 StringBuilder out = new StringBuilder(source.length()); 098 099 // Preamble: source bytes before the first region's first token. 100 int firstStart = regions.get(0).getRange().getStartOffset(); 101 appendSourceSlice(out, source, 0, firstStart, "preamble"); 102 103 for (int i = 0; i < regions.size(); i++) { 104 RenderedRegion region = regions.get(i); 105 int rStart = region.getRange().getStartOffset(); 106 int rEnd = region.getRange().getEndOffset(); 107 108 // Emit the renderer's output for this region. 109 out.append(region.getText()); 110 111 if (i + 1 < regions.size()) { 112 int nextStart = regions.get(i + 1).getRange().getStartOffset(); 113 if (rEnd > nextStart) { 114 // Overlapping regions — the boundary detector should never 115 // produce this, but log and skip rather than double-emit. 116 PPLogger.info("RegionAssembler: region[" + i + "].endOffset=" 117 + rEnd + " > region[" + (i + 1) + "].startOffset=" 118 + nextStart + " — overlapping ranges; skipping gap"); 119 } else { 120 // Inter-region gap: whitespace, comments, blank lines, etc. 121 appendSourceSlice(out, source, rEnd, nextStart, 122 "gap[" + i + "->" + (i + 1) + "]"); 123 } 124 } 125 } 126 127 // Trailer: source bytes after the last region's last token. 128 int lastEnd = regions.get(regions.size() - 1).getRange().getEndOffset(); 129 appendSourceSlice(out, source, lastEnd, source.length(), "trailer"); 130 131 return out.toString(); 132 } 133 134 /** 135 * Append {@code source[from..to)} to {@code out}. Guards against 136 * out-of-bounds indices: an invalid range logs a warning and emits 137 * nothing rather than throwing, so a single bad region cannot corrupt the 138 * rest of the assembly. 139 */ 140 private static void appendSourceSlice(StringBuilder out, 141 String source, 142 int from, int to, 143 String label) { 144 if (from >= to) return; // empty or zero-width slice 145 if (from < 0 || to > source.length()) { 146 PPLogger.info("RegionAssembler: " + label + " source slice [" 147 + from + ".." + to + ") is out of bounds " 148 + "(sourceLen=" + source.length() + "); skipping"); 149 return; 150 } 151 out.append(source, from, to); 152 } 153}