001package gudusoft.gsqlparser.pp2.region; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.TGSqlParser; 005 006/** 007 * Size-1 pool that owns a single {@link TGSqlParser} per {@link EDbVendor}. 008 * 009 * <p>Allocating a new {@code TGSqlParser} is non-trivial (vendor parser 010 * factory wiring, lexer state, license initialisation). For a script with N 011 * statement regions, the engine wants to perform up to N parse attempts 012 * without paying allocation cost per region. {@code ParserPool} hands out 013 * the same instance every time after a lazy first allocation, and provides 014 * an explicit {@link #reset()} for the engine to call between regions. 015 * 016 * <p>"Size-1" is the plan's mandate (§5.3, §7.3/S12). The pool is therefore 017 * <b>single-threaded by construction</b> — a single {@code ParseRecoveryEngine} 018 * must never call {@link #borrow()} or {@link #reset()} from more than one 019 * thread concurrently. No locking is provided; concurrent use is undefined. 020 * 021 * <h2>Reset semantics</h2> 022 * 023 * <p>{@link TGSqlParser#prepareForReuse()} (available since GSP 3.2.0.0) 024 * clears cached vendor parser, the SQL environment, the source filename, the 025 * statement list, and the syntax-error list, and resets parsing options to 026 * their defaults. The pool's {@link #reset()} method invokes 027 * {@code prepareForReuse()} and additionally calls 028 * {@link TGSqlParser#setSqltext(String) setSqltext("")} which atomically 029 * clears {@code sqltext}, {@code sqlfilename}, and the cached 030 * {@code sqlInputStream}. {@code prepareForReuse} as of 3.2.x does not 031 * touch {@code sqlInputStream}, so this extra step is required to avoid a 032 * prior file-based parse's stream leaking into the next call. 033 * 034 * <p>Note: {@code setSqlInputStream(null)} would <i>throw</i> because of an 035 * unconditional {@code new BufferedInputStream(input)} on its else branch. 036 * The {@code setSqltext("")} path is the only safe way to null out the 037 * stream field from public API. 038 * 039 * <p>Reset is idempotent and safe to call before the first {@link #borrow()}. 040 * 041 * <p>Plan reference: §5.3, §7.3/S12, §7.4/S12, §13/R1. 042 */ 043public final class ParserPool { 044 045 private final EDbVendor vendor; 046 private TGSqlParser parser; // lazily allocated by borrow() 047 private long borrowCount; // for tests / observability 048 private long resetCount; 049 050 public ParserPool(EDbVendor vendor) { 051 if (vendor == null) throw new NullPointerException("vendor"); 052 this.vendor = vendor; 053 } 054 055 public EDbVendor getVendor() { return vendor; } 056 057 /** 058 * Return the pooled parser, allocating it on the first call. Increments 059 * {@link #getBorrowCount()}. 060 */ 061 public TGSqlParser borrow() { 062 if (parser == null) { 063 parser = new TGSqlParser(vendor); 064 } 065 borrowCount++; 066 return parser; 067 } 068 069 /** 070 * Clear stale parser state so the next {@link #borrow()} sees a clean 071 * baseline. Safe to call before the first borrow (no-op). Increments 072 * {@link #getResetCount()} regardless. 073 * 074 * <h3>Why this does NOT call {@code prepareForReuse()} (performance)</h3> 075 * 076 * <p>{@link TGSqlParser#prepareForReuse()} nulls the cached 077 * <i>vendor parser</i>. The vendor parser owns the YACC/LEX parse tables — 078 * large {@code int[]}/{@code long[]} arrays (~100KB+ per dialect). Nulling 079 * it forces {@code parse()} to re-create the vendor parser and reload those 080 * tables on <i>every</i> region. On a script with hundreds of statement 081 * regions that is hundreds of full table reloads — measured at ~351MB of 082 * {@code int[]} churn for an 18KB / 534-region invalid script, which pushes 083 * the formatter into GC thrash (and OOM-to-FAILED) under a constrained heap 084 * (-Xmx512m). {@code getVendorParser()} is explicitly designed to cache and 085 * reuse the vendor parser across {@code parse()} calls, so reusing it 086 * between regions is the supported path, not a hack. (Root-caused in S36; 087 * gates S37. plan §13/R14, R16.) 088 * 089 * <p>A lighter reset is sufficient because {@code doparse()} already 090 * rebuilds {@code sourcetokenlist} and {@code sqlstatements} and clears 091 * {@code syntaxErrors} at the start of every parse. We only need to clear 092 * the input fields here: {@code setSqltext("")} atomically clears 093 * {@code sqltext}, {@code sqlfilename}, AND the cached {@code sqlInputStream} 094 * (calling {@code setSqlInputStream(null)} would throw, so {@code setSqltext} 095 * is the only safe way to null the stream field from public API). 096 */ 097 public void reset() { 098 resetCount++; 099 if (parser == null) return; 100 try { 101 // Clears sqltext + sqlfilename + cached input stream. Keeps the 102 // cached vendor parser (and its parse tables) so the next parse 103 // does not reload them. parse() rebuilds the statement list and 104 // clears syntax errors itself, so no stale parse output bleeds. 105 parser.setSqltext(""); 106 } catch (Throwable t) { 107 // setSqltext is not expected to throw; if it ever does, drop the 108 // parser so the next borrow() rebuilds a clean one. Reset stays 109 // non-throwing. 110 parser = null; 111 } 112 } 113 114 /** Number of times {@link #borrow()} has been called. */ 115 public long getBorrowCount() { return borrowCount; } 116 117 /** Number of times {@link #reset()} has been called. */ 118 public long getResetCount() { return resetCount; } 119 120 /** 121 * True when the pool has allocated its parser. Visible for tests that 122 * want to assert lazy allocation behaviour. 123 */ 124 public boolean isAllocated() { return parser != null; } 125}