001package gudusoft.gsqlparser.demos.sqlguard; 002 003import gudusoft.gsqlparser.util.json.JSON; 004 005import java.util.ArrayList; 006import java.util.LinkedHashMap; 007import java.util.List; 008import java.util.Map; 009 010/** Response POJO and nested contract types for SQL Guard. */ 011public class SqlGuardResponse { 012 public boolean ok = true; 013 public String requestId; 014 public String decision = "ALLOW"; 015 public int riskScore = 0; 016 public String summary = "SQL allowed."; 017 public Facts facts = new Facts(); 018 public List<StatementResult> statements = new ArrayList<StatementResult>(); 019 public List<Violation> violations = new ArrayList<Violation>(); 020 public List<String> suggestions = new ArrayList<String>(); 021 public Map<String, Object> audit = new LinkedHashMap<String, Object>(); 022 public ErrorInfo error; 023 024 public String toJson() { return JSON.toJSONString(toMap()); } 025 026 public Map<String, Object> toMap() { 027 LinkedHashMap<String, Object> m = new LinkedHashMap<String, Object>(); 028 m.put("ok", ok); 029 if (requestId != null) m.put("requestId", requestId); 030 if (ok) { 031 m.put("decision", decision); m.put("riskScore", riskScore); m.put("summary", summary); 032 m.put("facts", facts.toMap()); m.put("statements", toStatementMaps()); 033 m.put("violations", toViolationMaps()); m.put("suggestions", suggestions); 034 } else if (error != null) m.put("error", error.toMap()); 035 m.put("audit", audit); 036 return m; 037 } 038 039 private List<Map<String,Object>> toStatementMaps(){ List<Map<String,Object>> l=new ArrayList<Map<String,Object>>(); for(StatementResult s:statements)l.add(s.toMap()); return l; } 040 private List<Map<String,Object>> toViolationMaps(){ List<Map<String,Object>> l=new ArrayList<Map<String,Object>>(); for(Violation v:violations)l.add(v.toMap()); return l; } 041 042 public static SqlGuardResponse error(String requestId, String code, String message) { 043 SqlGuardResponse r = new SqlGuardResponse(); r.ok = false; r.requestId = requestId; r.error = new ErrorInfo(); 044 r.error.code = code; r.error.message = message; r.audit.put("engine", "gsp-java"); return r; 045 } 046 047 public static class Facts { 048 public String dialect; 049 public List<TableFact> tables = new ArrayList<TableFact>(); 050 public List<ColumnFact> columns = new ArrayList<ColumnFact>(); 051 public List<JoinFact> joins = new ArrayList<JoinFact>(); 052 public List<Object> filters = new ArrayList<Object>(); 053 public List<AggregationFact> aggregations = new ArrayList<AggregationFact>(); 054 public List<LineageEdge> lineage = new ArrayList<LineageEdge>(); 055 public List<SetOperationFact> setOperations = new ArrayList<SetOperationFact>(); 056 public List<String> ctes = new ArrayList<String>(); 057 public List<OrderByFact> orderBy = new ArrayList<OrderByFact>(); 058 public List<String> diagnostics = new ArrayList<String>(); 059 Map<String,Object> toMap(){ 060 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 061 m.put("dialect", dialect); 062 m.put("tables", maps(tables)); 063 m.put("columns", maps(columns)); 064 m.put("joins", maps(joins)); 065 m.put("filters", filters); 066 m.put("aggregations", maps(aggregations)); 067 m.put("lineage", maps(lineage)); 068 m.put("setOperations", maps(setOperations)); 069 m.put("ctes", ctes); 070 m.put("orderBy", maps(orderBy)); 071 m.put("diagnostics", diagnostics); 072 return m; 073 } 074 } 075 interface Mappable { Map<String,Object> toMap(); } 076 static List<Map<String,Object>> maps(List<? extends Mappable> in){ List<Map<String,Object>> out=new ArrayList<Map<String,Object>>(); for(Mappable x:in)out.add(x.toMap()); return out; } 077 public static class TableFact implements Mappable { public String name; public String schema; public boolean catalogMatched; public Map<String,Object> toMap(){ LinkedHashMap<String,Object>m=new LinkedHashMap<String,Object>(); m.put("name",name); m.put("schema",schema); m.put("catalogMatched",catalogMatched); return m; }} 078 public static class ColumnFact implements Mappable { public String table; public String name; public List<String> policyTags=new ArrayList<String>(); public String exposure; public Map<String,Object> toMap(){ LinkedHashMap<String,Object>m=new LinkedHashMap<String,Object>(); m.put("table",table); m.put("name",name); m.put("policyTags",policyTags); m.put("exposure",exposure); return m; }} 079 080 /** 081 * One ON-predicate pair emitted by the {@code WRONG_JOIN_PATH} rule 082 * (blueprint §S24). Populated only when both sides bind to catalog 083 * columns. 084 * 085 * <ul> 086 * <li>{@code joinPath} — the equality the SQL wrote, in canonical 087 * form: {@code "left.table.col = right.table.col"} (lowercased, 088 * fully-qualified to disambiguate identical-name tables).</li> 089 * <li>{@code joinPathMatchesForeignKey} — {@code true} when the 090 * predicate's columns line up with a catalog FK between the 091 * two tables (in either direction). {@code false} otherwise.</li> 092 * <li>{@code expectedForeignKeyPath} — non-null only when an FK 093 * exists between the joined tables but the predicate doesn't 094 * match it. Carries the canonical "correct" predicate so the 095 * BFF can surface a Did-you-mean.</li> 096 * </ul> 097 * 098 * <p><b>Wire-format change in S24.</b> Pre-S24, {@code facts.joins} 099 * was always serialized as {@code []}. S24 introduces typed 100 * {@code JoinFact} entries; downstream consumers can no longer 101 * assume the array is empty. {@code joinPath} and 102 * {@code joinPathMatchesForeignKey} are always present on each 103 * entry; {@code expectedForeignKeyPath} is OMITTED from the JSON 104 * when null (i.e. for FK-matching joins and joins between tables 105 * with no FK relationship at all). Joins whose sides don't catalog- 106 * bind are NOT emitted (CTEs, subquery aliases, etc.).</p> 107 * 108 * <p><b>Single-column FK only (V1).</b> The matching logic uses 109 * {@link gudusoft.gsqlparser.catalog.input.model.ConstraintModel} 110 * entries whose {@code columns().size() == 1 && 111 * referencedColumns().size() == 1}. Composite-FK queries currently 112 * report {@code joinPathMatchesForeignKey:false} and do NOT fire 113 * WRONG_JOIN_PATH (because no single-column FK exists between the 114 * tables — the rule stays silent rather than misfire). Multi- 115 * column FK correlation is deferred to blueprint B8 (TODO #15).</p> 116 */ 117 public static class JoinFact implements Mappable { 118 public String joinPath; 119 public boolean joinPathMatchesForeignKey; 120 public String expectedForeignKeyPath; 121 public Map<String,Object> toMap() { 122 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 123 m.put("joinPath", joinPath); 124 m.put("joinPathMatchesForeignKey", joinPathMatchesForeignKey); 125 if (expectedForeignKeyPath != null) { 126 m.put("expectedForeignKeyPath", expectedForeignKeyPath); 127 } 128 return m; 129 } 130 } 131 132 /** 133 * One UNION / INTERSECT / EXCEPT / MINUS operator in the statement, 134 * populated by walking {@code TSelectSqlStatement.isCombinedQuery()} + 135 * {@code getLeftStmt()}/{@code getRightStmt()}. Blueprint §S14/§S15. 136 * 137 * <p>The {@code op} carries the canonical SQL token form 138 * ({@code "UNION"}, {@code "UNION ALL"}, {@code "INTERSECT"}, 139 * {@code "EXCEPT"}, {@code "EXCEPT ALL"}, {@code "MINUS"}). Multiple 140 * set-ops in one statement emit one entry per operator in left-to-right 141 * tree order. {@code statementIndex} pins each entry to the source 142 * statement so multi-statement requests stay disambiguated.</p> 143 */ 144 public static class SetOperationFact implements Mappable { 145 public int statementIndex; 146 public String op; 147 public Map<String,Object> toMap() { 148 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 149 m.put("statementIndex", statementIndex); 150 m.put("op", op); 151 return m; 152 } 153 } 154 155 /** 156 * One aggregation-level fact per SELECT, populated when the statement 157 * carries a HAVING clause. Blueprint §S16. The {@code having} text is 158 * round-tripped from {@code THavingClause.toString()} — dialect-faithful 159 * verbatim text (lowercased to match the existing convention) so a 160 * downstream BFF can echo "this query filters groups by …" without 161 * re-parsing. 162 * 163 * <p>Wire-format contract: empty list when the statement has no 164 * HAVING; one entry per statement that does. Currently single-field; 165 * future S16 work may add {@code groupBy} / {@code having.atoms[]} — 166 * the typed shape leaves room without breaking consumers.</p> 167 */ 168 public static class AggregationFact implements Mappable { 169 public int statementIndex; 170 public String having; 171 public Map<String,Object> toMap() { 172 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 173 m.put("statementIndex", statementIndex); 174 if (having != null) m.put("having", having); 175 return m; 176 } 177 } 178 public static class LineageEdge implements Mappable { 179 public int statementIndex; 180 public String targetColumn; 181 public List<String> sourceColumns = new ArrayList<String>(); 182 public String lineageType; 183 /** 184 * Aggregate function calls detected inside this edge's source 185 * expression, in left-to-right textual order, deduplicated. Each 186 * entry is the bare uppercase function name (e.g. {@code "COUNT"}, 187 * {@code "SUM"}, {@code "AVG"}) or {@code "FUNC(DISTINCT)"} when 188 * the call uses the DISTINCT modifier (e.g. {@code COUNT(DISTINCT)}). 189 * 190 * <p>Multi-aggregate expressions emit every call faithfully so 191 * downstream intent-mismatch checks (blueprint §S23) can reason 192 * about the actual aggregation grain: {@code COUNT(DISTINCT a) / 193 * COUNT(*)} → {@code ["COUNT(DISTINCT)", "COUNT"]}, not a 194 * collapsed scalar. A naive scalar field would record only the 195 * first match and silently lie about the rest of the expression. 196 * 197 * <p>Scope: scalar aggregate calls only. Window aggregates 198 * ({@code AVG(x) OVER (...)}) live on the separate 199 * {@link #windowFunctions} field; FILTER aggregates 200 * ({@code COUNT(x) FILTER (WHERE ...)}) are still deferred 201 * (MantisBT #4468). A statement that mixes scalar and window 202 * aggregates produces edges in both fields. 203 * 204 * <p>Wire-format contract: {@code null} for non-aggregate edges 205 * and OMITTED from the JSON map so unaffected edges round-trip 206 * byte-identical to the pre-S2 shape. 207 */ 208 public List<String> aggregateFunctions; 209 /** 210 * Window function calls detected inside this edge's source 211 * expression. Populated when any {@link gudusoft.gsqlparser.nodes.TFunctionCall} 212 * in the projection carries a non-null 213 * {@code getWindowSpecification()} / {@code getWindowDef()}. 214 * 215 * <p>Companion to {@link #aggregateFunctions} (scalar form) and 216 * activated by blueprint §1.5.B B1 / B2. When this list is 217 * non-empty, {@link #lineageType} is {@code "window"} and the 218 * {@link #sourceColumns} list is augmented with the columns 219 * referenced inside the window spec's {@code PARTITION BY} and 220 * {@code ORDER BY} — dlineage surfaces those as {@code fdr} edges 221 * with {@code clauseType="selectList"} / {@code "orderby"} on 222 * the function resultset; sqlguard promotes them into 223 * {@code sourceColumns} because they determine which rows the 224 * window aggregate reads. 225 * 226 * <p><b>Wire-format contract:</b> {@code null} for non-window 227 * edges and OMITTED from the JSON map; window edges add the 228 * {@code windowFunctions} key. {@link #lineageType} simultaneously 229 * shifts from {@code "expression"} (pre-B1) to {@code "window"} 230 * for any projection containing an {@code OVER (...)} call, which 231 * IS a wire-format change for downstream consumers that were 232 * previously seeing window aggregates classified as 233 * {@code "expression"}. 234 * 235 * <p><b>L3 escalation status (MantisBT #4467).</b> Populated 236 * AST-side via {@link gudusoft.gsqlparser.nodes.TFunctionCall#getWindowDef()} 237 * because dlineage does not surface window-spec metadata as 238 * typed fields. When #4467 lands, the visitor migrates to 239 * dlineage's {@code relationship.getWindowSpec()} and this 240 * wire field stays unchanged. 241 */ 242 public List<WindowFn> windowFunctions; 243 public Map<String,Object> toMap() { 244 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 245 m.put("statementIndex", statementIndex); 246 m.put("targetColumn", targetColumn); 247 m.put("sourceColumns", sourceColumns); 248 m.put("lineageType", lineageType); 249 if (aggregateFunctions != null) { 250 m.put("aggregateFunctions", aggregateFunctions); 251 } 252 if (windowFunctions != null) { 253 List<Map<String,Object>> wfs = new ArrayList<Map<String,Object>>(); 254 for (WindowFn wf : windowFunctions) { 255 wfs.add(wf.toMap()); 256 } 257 m.put("windowFunctions", wfs); 258 } 259 return m; 260 } 261 } 262 263 /** 264 * One window-function call found inside a {@link LineageEdge}'s 265 * source expression. Activated by blueprint §1.5.B B1 / B2. 266 * 267 * <ul> 268 * <li>{@code functionName} — uppercase function name 269 * (e.g. {@code "COUNT"}, {@code "LAG"}, {@code "ROW_NUMBER"}), 270 * or {@code "FUNC(DISTINCT)"} when the call uses the DISTINCT 271 * modifier. Matches the 272 * {@link LineageEdge#aggregateFunctions} convention.</li> 273 * <li>{@code partitionBy} — column names from {@code PARTITION BY} 274 * in textual order. Captured from the {@code TFunctionCall}'s 275 * {@code getWindowDef().getPartitionClause()}. Identifiers are 276 * lowercased per the project's vendor-aware folding so the 277 * wire shape matches the rest of {@code sourceColumns}; values 278 * are simple identifiers (no schema/table qualifier) because 279 * the partition syntax accepts bare expressions, not necessarily 280 * fully-qualified column refs.</li> 281 * <li>{@code orderBy} — entries from the window's {@code ORDER BY} 282 * clause as {@code {column, direction}} pairs, in textual 283 * order. {@code direction} is {@code "ASC"} / {@code "DESC"} / 284 * {@code "NONE"} when omitted in the SQL.</li> 285 * <li>{@code frame} — raw text of the frame specification when 286 * present (e.g. {@code "ROWS BETWEEN 1 PRECEDING AND CURRENT 287 * ROW"}), or {@code null} when no frame clause is given. We 288 * expose the source text rather than typed boundary nodes 289 * because frame syntax varies sharply across dialects; 290 * downstream consumers that need typed access should walk the 291 * AST. Omitted from JSON when {@code null}.</li> 292 * </ul> 293 * 294 * <p>Wire-format contract: {@code functionName}, {@code partitionBy}, 295 * {@code orderBy} are always present (possibly empty list); {@code 296 * frame} is OMITTED from the JSON map when {@code null}. 297 */ 298 public static class WindowFn implements Mappable { 299 public String functionName; 300 public List<String> partitionBy = new ArrayList<String>(); 301 public List<OrderByItem> orderBy = new ArrayList<OrderByItem>(); 302 public String frame; 303 public Map<String,Object> toMap() { 304 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 305 m.put("functionName", functionName); 306 m.put("partitionBy", partitionBy); 307 List<Map<String,Object>> obs = new ArrayList<Map<String,Object>>(); 308 for (OrderByItem ob : orderBy) { 309 obs.add(ob.toMap()); 310 } 311 m.put("orderBy", obs); 312 if (frame != null) { 313 m.put("frame", frame); 314 } 315 return m; 316 } 317 } 318 319 /** One ORDER BY entry inside a {@link WindowFn}. */ 320 public static class OrderByItem implements Mappable { 321 public String column; 322 public String direction; 323 public Map<String,Object> toMap() { 324 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 325 m.put("column", column); 326 m.put("direction", direction); 327 return m; 328 } 329 } 330 331 /** 332 * One top-level ORDER BY entry in a SELECT (blueprint §S12). Carries 333 * {@code statementIndex} so multi-statement requests stay disambiguated; 334 * this is the structural difference from {@link OrderByItem}, which is 335 * the per-window form embedded inside {@link WindowFn} where the 336 * statement context lives on the parent {@link LineageEdge}. 337 */ 338 public static class OrderByFact implements Mappable { 339 public int statementIndex; 340 public String column; 341 public String direction; 342 public Map<String,Object> toMap() { 343 LinkedHashMap<String,Object> m = new LinkedHashMap<String,Object>(); 344 m.put("statementIndex", statementIndex); 345 m.put("column", column); 346 m.put("direction", direction); 347 return m; 348 } 349 } 350 public static class StatementResult { public int index; public String sqlType; public String decision="ALLOW"; public int riskScore; public String summary="SQL allowed."; public int lineageCount; public Map<String,Object> toMap(){ LinkedHashMap<String,Object>m=new LinkedHashMap<String,Object>(); m.put("index",index); m.put("sqlType",sqlType); m.put("decision",decision); m.put("riskScore",riskScore); m.put("summary",summary); m.put("lineageCount",lineageCount); return m; }} 351 public static class Violation { public int statementIndex; public String code; public String severity; public String message; public String evidence; public Map<String,Object> toMap(){ LinkedHashMap<String,Object>m=new LinkedHashMap<String,Object>(); m.put("statementIndex",statementIndex); m.put("code",code); m.put("severity",severity); m.put("message",message); m.put("evidence",evidence); return m; }} 352 public static class ErrorInfo { public String code; public String message; public Map<String,Object> toMap(){ LinkedHashMap<String,Object>m=new LinkedHashMap<String,Object>(); m.put("code",code); m.put("message",message); return m; }} 353}