001package gudusoft.gsqlparser.pp2.overlay; 002 003import gudusoft.gsqlparser.ESetOperatorType; 004import gudusoft.gsqlparser.TCustomSqlStatement; 005import gudusoft.gsqlparser.TSourceToken; 006import gudusoft.gsqlparser.nodes.TParseTreeNode; 007import gudusoft.gsqlparser.nodes.TResultColumn; 008import gudusoft.gsqlparser.nodes.TResultColumnList; 009import gudusoft.gsqlparser.nodes.TTable; 010import gudusoft.gsqlparser.nodes.TTableList; 011import gudusoft.gsqlparser.nodes.TWhereClause; 012import gudusoft.gsqlparser.pp2.token.Pp2Token; 013import gudusoft.gsqlparser.pp2.token.Pp2TokenStream; 014import gudusoft.gsqlparser.pp2.token.TokenRole; 015import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 016 017import java.util.ArrayDeque; 018import java.util.Deque; 019import java.util.TreeMap; 020 021/** 022 * AST overlay annotator — the v3 evolution bridge (plan §5.4, §7.3/S33). 023 * 024 * <p>Reads a parsed {@link TCustomSqlStatement} and stamps AST-derived 025 * {@link TokenRole}s ({@code AST_SELECT_LIST_ITEM}, {@code AST_TABLE_REF}, 026 * {@code AST_WHERE_CONDITION}) onto the matching {@link Pp2Token}s in a 027 * {@link Pp2TokenStream}. The wrapped {@link TSourceToken} is never mutated; 028 * only the {@code Pp2Token.roles} side-channel is written (forbidden #6 029 * explicitly permits designated annotator stages to write roles). 030 * 031 * <p><b>Feature flag.</b> This annotator is <b>off by default</b> in v2 032 * ({@code Pp2FormatOptions.astOverlayEnabled == false}). When the flag is on, 033 * {@code Pp2Engine} invokes it for each cleanly-parsed (AST_OK) region. v2 034 * does not yet render from these roles — that is v3's 035 * {@code AstOverlayRenderer}. Landing the annotation infrastructure now means 036 * v3 is a flag flip plus a renderer add, not a re-architecture. 037 * 038 * <h2>Offset-alignment contract</h2> 039 * 040 * <p>The {@code stream} passed to {@link #annotate} MUST be built from the 041 * <i>same parse</i> (same {@code TSourceTokenList}) as {@code stmt}'s AST, so 042 * the AST node start/end token offsets line up with the stream tokens' 043 * {@code TSourceToken.offset}. {@code Pp2Engine} guarantees this by building 044 * the region stream from the region parser's own token list; the standalone 045 * tests build the stream from the same {@code TGSqlParser} that produced the 046 * statement. 047 * 048 * <h2>Iterative traversal (CLAUDE.md mandate, plan R18)</h2> 049 * 050 * <p>UNION / INTERSECT / EXCEPT statements form a deep left-leaning 051 * {@code leftStmt}/{@code rightStmt} tree from the left-recursive grammar. A 052 * recursive descent would {@code StackOverflowError} on 2000-deep set 053 * operations. This annotator walks that tree with an explicit 054 * {@link ArrayDeque}. Within each leaf SELECT, the result-column list and 055 * table list are bounded in width (not depth), so a flat iteration over them 056 * is safe. The annotator never descends the WHERE expression tree — it marks 057 * the flat token span of the {@link TWhereClause} node only, so deep AND/OR 058 * expression trees cannot trigger recursion here either. 059 */ 060public final class AstOverlayAnnotator { 061 062 /** 063 * Annotate {@code stream} with AST-derived roles from {@code stmt}, where 064 * the stream and the AST come from the same parse (offset adjustment 0). 065 * 066 * @param stmt a parsed statement root; may be null (no-op, returns 0) 067 * @param stream the token stream built from the same parse; may be null 068 * (no-op, returns 0) 069 * @return the number of {@code (token, role)} annotations applied (a token 070 * touched by two roles counts twice). Useful for tests and for a 071 * v3 caller to know whether the overlay produced anything. 072 */ 073 public int annotate(TCustomSqlStatement stmt, Pp2TokenStream stream) { 074 return annotate(stmt, stream, 0L); 075 } 076 077 /** 078 * Annotate {@code stream} with AST-derived roles from {@code stmt}, applying 079 * a fixed {@code offsetAdjustment} to every AST node span before matching it 080 * against the stream tokens' absolute offsets. 081 * 082 * <p>This is the engine wiring seam: {@code Pp2Engine} parses each region 083 * from {@code originalSql.substring(range.getStartOffset(), ...)}, so the 084 * region AST's token offsets are relative to the region substring. Passing 085 * {@code offsetAdjustment = range.getStartOffset()} translates them onto the 086 * engine's whole-script stream so the roles land on the exact tokens the 087 * renderers see. (The same-parse overload passes 0.) 088 * 089 * @param stmt parsed statement root; may be null (no-op) 090 * @param stream target stream (absolute offsets); may be null (no-op) 091 * @param offsetAdjustment added to each AST node's start/end offset before 092 * matching against {@code stream} token offsets 093 * @return number of {@code (token, role)} annotations applied 094 */ 095 public int annotate(TCustomSqlStatement stmt, Pp2TokenStream stream, 096 long offsetAdjustment) { 097 if (stmt == null || stream == null || stream.isEmpty()) { 098 return 0; 099 } 100 101 // Index stream tokens by their source offset once. TreeMap gives an 102 // O(log n + k) range scan per AST node span via subMap. 103 TreeMap<Long, Pp2Token> byOffset = new TreeMap<Long, Pp2Token>(); 104 for (Pp2Token t : stream) { 105 TSourceToken st = t.getSourceToken(); 106 if (st != null) { 107 // First token wins on a duplicate offset (shouldn't happen for 108 // solid tokens from one parse). 109 if (!byOffset.containsKey(st.offset)) { 110 byOffset.put(st.offset, t); 111 } 112 } 113 } 114 if (byOffset.isEmpty()) { 115 return 0; 116 } 117 118 int annotated = 0; 119 120 // Iterative walk of the (possibly very deep) set-operator tree. 121 Deque<TCustomSqlStatement> stack = new ArrayDeque<TCustomSqlStatement>(); 122 stack.push(stmt); 123 while (!stack.isEmpty()) { 124 TCustomSqlStatement cur = stack.pop(); 125 if (cur == null) { 126 continue; 127 } 128 if (cur instanceof TSelectSqlStatement) { 129 TSelectSqlStatement sel = (TSelectSqlStatement) cur; 130 if (sel.getSetOperatorType() != ESetOperatorType.none) { 131 // Combined query: push children, do not annotate this node 132 // directly — its operands are the leaf SELECTs. 133 if (sel.getRightStmt() != null) { 134 stack.push(sel.getRightStmt()); 135 } 136 if (sel.getLeftStmt() != null) { 137 stack.push(sel.getLeftStmt()); 138 } 139 continue; 140 } 141 annotated += annotateSelectColumns(sel, byOffset, offsetAdjustment); 142 } 143 // Tables and WHERE live on the base TCustomSqlStatement, so they are 144 // annotated for any leaf statement kind (SELECT, UPDATE, DELETE...). 145 annotated += annotateTables(cur, byOffset, offsetAdjustment); 146 annotated += annotateWhere(cur, byOffset, offsetAdjustment); 147 } 148 return annotated; 149 } 150 151 private int annotateSelectColumns(TSelectSqlStatement sel, 152 TreeMap<Long, Pp2Token> byOffset, long adj) { 153 TResultColumnList cols = sel.getResultColumnList(); 154 if (cols == null) { 155 return 0; 156 } 157 int n = 0; 158 for (TResultColumn col : cols) { 159 n += stampSpan(col, byOffset, TokenRole.AST_SELECT_LIST_ITEM, adj); 160 } 161 return n; 162 } 163 164 private int annotateTables(TCustomSqlStatement cur, 165 TreeMap<Long, Pp2Token> byOffset, long adj) { 166 TTableList tables = cur.getTables(); 167 if (tables == null) { 168 return 0; 169 } 170 int n = 0; 171 for (TTable table : tables) { 172 n += stampSpan(table, byOffset, TokenRole.AST_TABLE_REF, adj); 173 } 174 return n; 175 } 176 177 private int annotateWhere(TCustomSqlStatement cur, 178 TreeMap<Long, Pp2Token> byOffset, long adj) { 179 TWhereClause where = cur.getWhereClause(); 180 if (where == null) { 181 return 0; 182 } 183 // Span only — never descends the (possibly 2000-deep) AND/OR tree. 184 return stampSpan(where, byOffset, TokenRole.AST_WHERE_CONDITION, adj); 185 } 186 187 /** 188 * Stamp {@code role} on every stream token whose source offset falls in the 189 * inclusive span of {@code node}'s start/end tokens (shifted by {@code adj}). 190 * Returns the count of tokens stamped. 191 */ 192 private int stampSpan(TParseTreeNode node, TreeMap<Long, Pp2Token> byOffset, 193 TokenRole role, long adj) { 194 if (node == null) { 195 return 0; 196 } 197 TSourceToken start = node.getStartToken(); 198 TSourceToken end = node.getEndToken(); 199 if (start == null || end == null) { 200 return 0; 201 } 202 long lo = Math.min(start.offset, end.offset) + adj; 203 long hi = Math.max(start.offset, end.offset) + adj; 204 int n = 0; 205 for (Pp2Token t : byOffset.subMap(lo, true, hi, true).values()) { 206 if (!t.hasRole(role)) { 207 t.addRole(role); 208 n++; 209 } 210 } 211 return n; 212 } 213}