001package gudusoft.gsqlparser.pp2.token; 002 003import gudusoft.gsqlparser.TSourceToken; 004 005/** 006 * Shared helpers for reasoning about a {@link TSourceToken}'s position 007 * coverage on the input. 008 * 009 * <p>Several pp2 stages walk a {@code TSourceTokenList} in offset order 010 * and need to skip tokens that overlap an already-emitted span. The 011 * pattern shows up in: 012 * <ul> 013 * <li>{@link Pp2TokenStreamBuilder} — folding whitespace, skipping 014 * phantom inner tokens emitted by GSP's lexer for shapes like 015 * {@code ${name}}.</li> 016 * <li>{@link SourceSpanLedger} — covering every byte exactly once.</li> 017 * <li>{@link TokenEquivalence} — building the comparable-token 018 * sequence.</li> 019 * <li>(planned) S11 statement-boundary detector — locating 020 * statement terminators inside a multi-region stream.</li> 021 * </ul> 022 * 023 * <p>Centralizing the logic here avoids the predicate drifting across 024 * call sites as Phase 2 lands. 025 * 026 * <p>Plan reference: §7.3/S9 (the GSP {@code ${name}} probe finding). 027 */ 028public final class TokenCoverage { 029 030 private TokenCoverage() { 031 // utility class 032 } 033 034 /** 035 * True when {@code token}'s source range is FULLY contained in an 036 * already-emitted span ending at {@code priorEnd}. 037 * 038 * <p>Fully-contained tokens are phantom inner tokens emitted by the 039 * GSP lexer (e.g., the {@code "{" "name" "}"} that follow an outer 040 * {@code "${name}"} identifier). Skipping them is safe because the 041 * outer token already carries the same bytes. 042 * 043 * <p>Partial overlap (start shadowed but end extending past) is NOT 044 * fully contained; callers must handle it separately (either throw 045 * loudly or continue processing — never silently drop, since that 046 * would lose bytes). 047 * 048 * @param token the candidate token; {@code null} is tolerated and 049 * returns {@code false} so callers can treat it as 050 * "not shadowed; handle null separately" 051 * @param priorEnd the source offset where the previously emitted 052 * span ends; pass {@code -1} when nothing has been 053 * emitted yet 054 * @return {@code true} iff {@code token} is fully shadowed 055 */ 056 public static boolean isFullyShadowed(TSourceToken token, long priorEnd) { 057 if (token == null) return false; 058 String text = token.toString(); 059 if (text == null || text.isEmpty()) { 060 // Empty tokens occupy no bytes; not "shadowed" per se. Caller 061 // typically filters them separately. 062 return false; 063 } 064 long start = token.offset; 065 if (start < 0 || start >= priorEnd) return false; 066 long end = start + text.length(); 067 return end <= priorEnd; 068 } 069 070 /** 071 * Convenience: compute the half-open end offset 072 * ({@code offset + text.length()}) for a token. Returns the 073 * token's offset when the text is null or empty. 074 */ 075 public static long endOffset(TSourceToken token) { 076 if (token == null) return -1L; 077 String text = token.toString(); 078 int len = text == null ? 0 : text.length(); 079 return token.offset + len; 080 } 081}