001package gudusoft.gsqlparser.pp2.region;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.TGSqlParser;
005
006/**
007 * Size-1 pool that owns a single {@link TGSqlParser} per {@link EDbVendor}.
008 *
009 * <p>Allocating a new {@code TGSqlParser} is non-trivial (vendor parser
010 * factory wiring, lexer state, license initialisation). For a script with N
011 * statement regions, the engine wants to perform up to N parse attempts
012 * without paying allocation cost per region. {@code ParserPool} hands out
013 * the same instance every time after a lazy first allocation, and provides
014 * an explicit {@link #reset()} for the engine to call between regions.
015 *
016 * <p>"Size-1" is the plan's mandate (§5.3, §7.3/S12). The pool is therefore
017 * <b>single-threaded by construction</b> — a single {@code ParseRecoveryEngine}
018 * must never call {@link #borrow()} or {@link #reset()} from more than one
019 * thread concurrently. No locking is provided; concurrent use is undefined.
020 *
021 * <h2>Reset semantics</h2>
022 *
023 * <p>{@link TGSqlParser#prepareForReuse()} (available since GSP 3.2.0.0)
024 * clears cached vendor parser, the SQL environment, the source filename, the
025 * statement list, and the syntax-error list, and resets parsing options to
026 * their defaults. The pool's {@link #reset()} method invokes
027 * {@code prepareForReuse()} and additionally calls
028 * {@link TGSqlParser#setSqltext(String) setSqltext("")} which atomically
029 * clears {@code sqltext}, {@code sqlfilename}, and the cached
030 * {@code sqlInputStream}. {@code prepareForReuse} as of 3.2.x does not
031 * touch {@code sqlInputStream}, so this extra step is required to avoid a
032 * prior file-based parse's stream leaking into the next call.
033 *
034 * <p>Note: {@code setSqlInputStream(null)} would <i>throw</i> because of an
035 * unconditional {@code new BufferedInputStream(input)} on its else branch.
036 * The {@code setSqltext("")} path is the only safe way to null out the
037 * stream field from public API.
038 *
039 * <p>Reset is idempotent and safe to call before the first {@link #borrow()}.
040 *
041 * <p>Plan reference: §5.3, §7.3/S12, §7.4/S12, §13/R1.
042 */
043public final class ParserPool {
044
045    private final EDbVendor vendor;
046    private TGSqlParser parser;     // lazily allocated by borrow()
047    private long borrowCount;       // for tests / observability
048    private long resetCount;
049
050    public ParserPool(EDbVendor vendor) {
051        if (vendor == null) throw new NullPointerException("vendor");
052        this.vendor = vendor;
053    }
054
055    public EDbVendor getVendor() { return vendor; }
056
057    /**
058     * Return the pooled parser, allocating it on the first call. Increments
059     * {@link #getBorrowCount()}.
060     */
061    public TGSqlParser borrow() {
062        if (parser == null) {
063            parser = new TGSqlParser(vendor);
064        }
065        borrowCount++;
066        return parser;
067    }
068
069    /**
070     * Clear stale parser state so the next {@link #borrow()} sees a clean
071     * baseline. Safe to call before the first borrow (no-op). Increments
072     * {@link #getResetCount()} regardless.
073     *
074     * <h3>Why this does NOT call {@code prepareForReuse()} (performance)</h3>
075     *
076     * <p>{@link TGSqlParser#prepareForReuse()} nulls the cached
077     * <i>vendor parser</i>. The vendor parser owns the YACC/LEX parse tables —
078     * large {@code int[]}/{@code long[]} arrays (~100KB+ per dialect). Nulling
079     * it forces {@code parse()} to re-create the vendor parser and reload those
080     * tables on <i>every</i> region. On a script with hundreds of statement
081     * regions that is hundreds of full table reloads — measured at ~351MB of
082     * {@code int[]} churn for an 18KB / 534-region invalid script, which pushes
083     * the formatter into GC thrash (and OOM-to-FAILED) under a constrained heap
084     * (-Xmx512m). {@code getVendorParser()} is explicitly designed to cache and
085     * reuse the vendor parser across {@code parse()} calls, so reusing it
086     * between regions is the supported path, not a hack. (Root-caused in S36;
087     * gates S37. plan §13/R14, R16.)
088     *
089     * <p>A lighter reset is sufficient because {@code doparse()} already
090     * rebuilds {@code sourcetokenlist} and {@code sqlstatements} and clears
091     * {@code syntaxErrors} at the start of every parse. We only need to clear
092     * the input fields here: {@code setSqltext("")} atomically clears
093     * {@code sqltext}, {@code sqlfilename}, AND the cached {@code sqlInputStream}
094     * (calling {@code setSqlInputStream(null)} would throw, so {@code setSqltext}
095     * is the only safe way to null the stream field from public API).
096     */
097    public void reset() {
098        resetCount++;
099        if (parser == null) return;
100        try {
101            // Clears sqltext + sqlfilename + cached input stream. Keeps the
102            // cached vendor parser (and its parse tables) so the next parse
103            // does not reload them. parse() rebuilds the statement list and
104            // clears syntax errors itself, so no stale parse output bleeds.
105            parser.setSqltext("");
106        } catch (Throwable t) {
107            // setSqltext is not expected to throw; if it ever does, drop the
108            // parser so the next borrow() rebuilds a clean one. Reset stays
109            // non-throwing.
110            parser = null;
111        }
112    }
113
114    /** Number of times {@link #borrow()} has been called. */
115    public long getBorrowCount() { return borrowCount; }
116
117    /** Number of times {@link #reset()} has been called. */
118    public long getResetCount() { return resetCount; }
119
120    /**
121     * True when the pool has allocated its parser. Visible for tests that
122     * want to assert lazy allocation behaviour.
123     */
124    public boolean isAllocated() { return parser != null; }
125}