001package gudusoft.gsqlparser.resolver2.inference; 002 003import gudusoft.gsqlparser.EExpressionType; 004import gudusoft.gsqlparser.nodes.*; 005import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 006 007import java.util.ArrayList; 008import java.util.List; 009 010/** 011 * Collects evidence for column inference from SQL statements. 012 * 013 * <p>The collector traverses various parts of SQL statements to gather 014 * evidence about which columns belong to which tables. This is used 015 * by the InferenceEngine to make educated guesses about column sources 016 * when metadata is not available. 017 * 018 * <p>Evidence sources: 019 * - WHERE clause: qualified and unqualified column references 020 * - JOIN conditions: columns used in join predicates 021 * - SELECT list: columns and expressions 022 * - GROUP BY: grouped columns 023 * - ORDER BY: sort columns 024 * - INSERT statements: column lists 025 * - UPDATE statements: SET clauses 026 * 027 * <p>Example: 028 * <pre> 029 * SELECT * FROM employees e 030 * WHERE e.department_id = 10 031 * 032 * Evidence collected: 033 * - "department_id" in "employees" (from qualified WHERE reference) 034 * </pre> 035 */ 036public class EvidenceCollector { 037 038 /** Collected evidence */ 039 private final List<InferenceEvidence> evidence = new ArrayList<>(); 040 041 /** 042 * Collect evidence from a SELECT statement. 043 * 044 * @param select the SELECT statement 045 * @return list of collected evidence 046 */ 047 public List<InferenceEvidence> collectFromSelect(TSelectSqlStatement select) { 048 evidence.clear(); 049 050 if (select == null) { 051 return new ArrayList<>(evidence); 052 } 053 054 // Collect from WHERE clause 055 if (select.getWhereClause() != null && 056 select.getWhereClause().getCondition() != null) { 057 collectFromExpression(select.getWhereClause().getCondition()); 058 } 059 060 // Collect from JOIN conditions 061 if (select.joins != null && select.joins.size() > 0) { 062 collectFromJoins(select.joins); 063 } 064 065 // Collect from SELECT list 066 if (select.getResultColumnList() != null) { 067 collectFromResultColumns(select.getResultColumnList()); 068 } 069 070 // Collect from GROUP BY 071 if (select.getGroupByClause() != null && 072 select.getGroupByClause().getItems() != null) { 073 collectFromGroupBy(select.getGroupByClause()); 074 } 075 076 // Collect from HAVING 077 if (select.getGroupByClause() != null && 078 select.getGroupByClause().getHavingClause() != null) { 079 collectFromExpression(select.getGroupByClause().getHavingClause()); 080 } 081 082 // Collect from ORDER BY 083 if (select.getOrderbyClause() != null && 084 select.getOrderbyClause().getItems() != null) { 085 collectFromOrderBy(select.getOrderbyClause()); 086 } 087 088 return new ArrayList<>(evidence); 089 } 090 091 /** 092 * Collect evidence from an expression (WHERE, HAVING, etc.). 093 */ 094 private void collectFromExpression(TExpression expr) { 095 if (expr == null) { 096 return; 097 } 098 099 // Get all column references in the expression 100 List<TObjectName> columns = expr.getColumnsInsideExpression(); 101 if (columns == null) { 102 return; 103 } 104 105 for (TObjectName objName : columns) { 106 collectFromObjectName(objName); 107 } 108 } 109 110 /** 111 * Collect evidence from a TObjectName (column reference). 112 */ 113 private void collectFromObjectName(TObjectName objName) { 114 if (objName == null) { 115 return; 116 } 117 118 String columnName = objName.getColumnNameOnly(); 119 if (columnName == null || columnName.isEmpty()) { 120 return; 121 } 122 123 // Check if it's a qualified reference (table.column) 124 if (objName.getTableString() != null && !objName.getTableString().isEmpty()) { 125 String tableName = objName.getTableString(); 126 evidence.add(InferenceEvidence.fromQualifiedReference( 127 columnName, 128 tableName, 129 objName 130 )); 131 } else { 132 // Unqualified reference - we can't determine table without context 133 // This would need to be handled by the resolver with scope information 134 // For now, we skip unqualified references in evidence collection 135 } 136 } 137 138 /** 139 * Collect evidence from JOIN conditions. 140 */ 141 private void collectFromJoins(TJoinList joins) { 142 if (joins == null) { 143 return; 144 } 145 146 for (int i = 0; i < joins.size(); i++) { 147 TJoin join = joins.getJoin(i); 148 if (join == null) { 149 continue; 150 } 151 152 // Collect from join items 153 if (join.getJoinItems() != null) { 154 for (int j = 0; j < join.getJoinItems().size(); j++) { 155 TJoinItem joinItem = join.getJoinItems().getJoinItem(j); 156 if (joinItem == null) { 157 continue; 158 } 159 160 // Collect from ON condition 161 if (joinItem.getOnCondition() != null) { 162 List<TObjectName> columns = joinItem.getOnCondition().getColumnsInsideExpression(); 163 if (columns != null) { 164 for (TObjectName objName : columns) { 165 if (objName.getTableString() != null && !objName.getTableString().isEmpty()) { 166 evidence.add(InferenceEvidence.fromJoinCondition( 167 objName.getColumnNameOnly(), 168 objName.getTableString(), 169 objName 170 )); 171 } 172 } 173 } 174 } 175 } 176 } 177 } 178 } 179 180 /** 181 * Collect evidence from SELECT list. 182 */ 183 private void collectFromResultColumns(TResultColumnList resultColumns) { 184 if (resultColumns == null) { 185 return; 186 } 187 188 for (int i = 0; i < resultColumns.size(); i++) { 189 TResultColumn rc = resultColumns.getResultColumn(i); 190 if (rc == null || rc.getExpr() == null) { 191 continue; 192 } 193 194 // Skip star columns for now (handled separately) 195 if (rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t) { 196 TObjectName objName = rc.getExpr().getObjectOperand(); 197 if (objName != null && !objName.toString().endsWith("*")) { 198 collectFromObjectName(objName); 199 } 200 } else { 201 // Collect from complex expressions 202 collectFromExpression(rc.getExpr()); 203 } 204 } 205 } 206 207 /** 208 * Collect evidence from GROUP BY clause. 209 */ 210 private void collectFromGroupBy(TGroupBy groupBy) { 211 if (groupBy == null || groupBy.getItems() == null) { 212 return; 213 } 214 215 for (int i = 0; i < groupBy.getItems().size(); i++) { 216 TGroupByItem item = groupBy.getItems().getGroupByItem(i); 217 if (item != null && item.getExpr() != null) { 218 collectFromExpression(item.getExpr()); 219 } 220 } 221 } 222 223 /** 224 * Collect evidence from ORDER BY clause. 225 */ 226 private void collectFromOrderBy(TOrderBy orderBy) { 227 if (orderBy == null || orderBy.getItems() == null) { 228 return; 229 } 230 231 for (int i = 0; i < orderBy.getItems().size(); i++) { 232 TOrderByItem item = orderBy.getItems().getOrderByItem(i); 233 if (item != null && item.getSortKey() != null) { 234 collectFromExpression(item.getSortKey()); 235 } 236 } 237 } 238 239 /** 240 * Get all collected evidence. 241 * 242 * @return list of evidence 243 */ 244 public List<InferenceEvidence> getEvidence() { 245 return new ArrayList<>(evidence); 246 } 247 248 /** 249 * Clear all collected evidence. 250 */ 251 public void clear() { 252 evidence.clear(); 253 } 254 255 /** 256 * Get count of collected evidence. 257 * 258 * @return evidence count 259 */ 260 public int getEvidenceCount() { 261 return evidence.size(); 262 } 263}