Source code

001package gudusoft.gsqlparser.pp2.token;
002
003import gudusoft.gsqlparser.TSourceToken;
004
005/**
006 * Shared helpers for reasoning about a {@link TSourceToken}'s position
007 * coverage on the input.
008 *
009 * <p>Several pp2 stages walk a {@code TSourceTokenList} in offset order
010 * and need to skip tokens that overlap an already-emitted span. The
011 * pattern shows up in:
012 * <ul>
013 *   <li>{@link Pp2TokenStreamBuilder} — folding whitespace, skipping
014 *       phantom inner tokens emitted by GSP's lexer for shapes like
015 *       {@code ${name}}.</li>
016 *   <li>{@link SourceSpanLedger} — covering every byte exactly once.</li>
017 *   <li>{@link TokenEquivalence} — building the comparable-token
018 *       sequence.</li>
019 *   <li>(planned) S11 statement-boundary detector — locating
020 *       statement terminators inside a multi-region stream.</li>
021 * </ul>
022 *
023 * <p>Centralizing the logic here avoids the predicate drifting across
024 * call sites as Phase 2 lands.
025 *
026 * <p>Plan reference: §7.3/S9 (the GSP {@code ${name}} probe finding).
027 */
028public final class TokenCoverage {
029
030    private TokenCoverage() {
031        // utility class
032    }
033
034    /**
035     * True when {@code token}'s source range is FULLY contained in an
036     * already-emitted span ending at {@code priorEnd}.
037     *
038     * <p>Fully-contained tokens are phantom inner tokens emitted by the
039     * GSP lexer (e.g., the {@code "{" "name" "}"} that follow an outer
040     * {@code "${name}"} identifier). Skipping them is safe because the
041     * outer token already carries the same bytes.
042     *
043     * <p>Partial overlap (start shadowed but end extending past) is NOT
044     * fully contained; callers must handle it separately (either throw
045     * loudly or continue processing — never silently drop, since that
046     * would lose bytes).
047     *
048     * @param token     the candidate token; {@code null} is tolerated and
049     *                  returns {@code false} so callers can treat it as
050     *                  "not shadowed; handle null separately"
051     * @param priorEnd  the source offset where the previously emitted
052     *                  span ends; pass {@code -1} when nothing has been
053     *                  emitted yet
054     * @return {@code true} iff {@code token} is fully shadowed
055     */
056    public static boolean isFullyShadowed(TSourceToken token, long priorEnd) {
057        if (token == null) return false;
058        String text = token.toString();
059        if (text == null || text.isEmpty()) {
060            // Empty tokens occupy no bytes; not "shadowed" per se. Caller
061            // typically filters them separately.
062            return false;
063        }
064        long start = token.offset;
065        if (start < 0 || start >= priorEnd) return false;
066        long end = start + text.length();
067        return end <= priorEnd;
068    }
069
070    /**
071     * Convenience: compute the half-open end offset
072     * ({@code offset + text.length()}) for a token. Returns the
073     * token's offset when the text is null or empty.
074     */
075    public static long endOffset(TSourceToken token) {
076        if (token == null) return -1L;
077        String text = token.toString();
078        int len = text == null ? 0 : text.length();
079        return token.offset + len;
080    }
081}