001package gudusoft.gsqlparser.resolver2.inference;
002
003import gudusoft.gsqlparser.EExpressionType;
004import gudusoft.gsqlparser.nodes.*;
005import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
006
007import java.util.ArrayList;
008import java.util.List;
009
010/**
011 * Collects evidence for column inference from SQL statements.
012 *
013 * <p>The collector traverses various parts of SQL statements to gather
014 * evidence about which columns belong to which tables. This is used
015 * by the InferenceEngine to make educated guesses about column sources
016 * when metadata is not available.
017 *
018 * <p>Evidence sources:
019 * - WHERE clause: qualified and unqualified column references
020 * - JOIN conditions: columns used in join predicates
021 * - SELECT list: columns and expressions
022 * - GROUP BY: grouped columns
023 * - ORDER BY: sort columns
024 * - INSERT statements: column lists
025 * - UPDATE statements: SET clauses
026 *
027 * <p>Example:
028 * <pre>
029 * SELECT * FROM employees e
030 * WHERE e.department_id = 10
031 *
032 * Evidence collected:
033 * - "department_id" in "employees" (from qualified WHERE reference)
034 * </pre>
035 */
036public class EvidenceCollector {
037
038    /** Collected evidence */
039    private final List<InferenceEvidence> evidence = new ArrayList<>();
040
041    /**
042     * Collect evidence from a SELECT statement.
043     *
044     * @param select the SELECT statement
045     * @return list of collected evidence
046     */
047    public List<InferenceEvidence> collectFromSelect(TSelectSqlStatement select) {
048        evidence.clear();
049
050        if (select == null) {
051            return new ArrayList<>(evidence);
052        }
053
054        // Collect from WHERE clause
055        if (select.getWhereClause() != null &&
056            select.getWhereClause().getCondition() != null) {
057            collectFromExpression(select.getWhereClause().getCondition());
058        }
059
060        // Collect from JOIN conditions
061        if (select.joins != null && select.joins.size() > 0) {
062            collectFromJoins(select.joins);
063        }
064
065        // Collect from SELECT list
066        if (select.getResultColumnList() != null) {
067            collectFromResultColumns(select.getResultColumnList());
068        }
069
070        // Collect from GROUP BY
071        if (select.getGroupByClause() != null &&
072            select.getGroupByClause().getItems() != null) {
073            collectFromGroupBy(select.getGroupByClause());
074        }
075
076        // Collect from HAVING
077        if (select.getGroupByClause() != null &&
078            select.getGroupByClause().getHavingClause() != null) {
079            collectFromExpression(select.getGroupByClause().getHavingClause());
080        }
081
082        // Collect from ORDER BY
083        if (select.getOrderbyClause() != null &&
084            select.getOrderbyClause().getItems() != null) {
085            collectFromOrderBy(select.getOrderbyClause());
086        }
087
088        return new ArrayList<>(evidence);
089    }
090
091    /**
092     * Collect evidence from an expression (WHERE, HAVING, etc.).
093     */
094    private void collectFromExpression(TExpression expr) {
095        if (expr == null) {
096            return;
097        }
098
099        // Get all column references in the expression
100        List<TObjectName> columns = expr.getColumnsInsideExpression();
101        if (columns == null) {
102            return;
103        }
104
105        for (TObjectName objName : columns) {
106            collectFromObjectName(objName);
107        }
108    }
109
110    /**
111     * Collect evidence from a TObjectName (column reference).
112     */
113    private void collectFromObjectName(TObjectName objName) {
114        if (objName == null) {
115            return;
116        }
117
118        String columnName = objName.getColumnNameOnly();
119        if (columnName == null || columnName.isEmpty()) {
120            return;
121        }
122
123        // Check if it's a qualified reference (table.column)
124        if (objName.getTableString() != null && !objName.getTableString().isEmpty()) {
125            String tableName = objName.getTableString();
126            evidence.add(InferenceEvidence.fromQualifiedReference(
127                columnName,
128                tableName,
129                objName
130            ));
131        } else {
132            // Unqualified reference - we can't determine table without context
133            // This would need to be handled by the resolver with scope information
134            // For now, we skip unqualified references in evidence collection
135        }
136    }
137
138    /**
139     * Collect evidence from JOIN conditions.
140     */
141    private void collectFromJoins(TJoinList joins) {
142        if (joins == null) {
143            return;
144        }
145
146        for (int i = 0; i < joins.size(); i++) {
147            TJoin join = joins.getJoin(i);
148            if (join == null) {
149                continue;
150            }
151
152            // Collect from join items
153            if (join.getJoinItems() != null) {
154                for (int j = 0; j < join.getJoinItems().size(); j++) {
155                    TJoinItem joinItem = join.getJoinItems().getJoinItem(j);
156                    if (joinItem == null) {
157                        continue;
158                    }
159
160                    // Collect from ON condition
161                    if (joinItem.getOnCondition() != null) {
162                        List<TObjectName> columns = joinItem.getOnCondition().getColumnsInsideExpression();
163                        if (columns != null) {
164                            for (TObjectName objName : columns) {
165                                if (objName.getTableString() != null && !objName.getTableString().isEmpty()) {
166                                    evidence.add(InferenceEvidence.fromJoinCondition(
167                                        objName.getColumnNameOnly(),
168                                        objName.getTableString(),
169                                        objName
170                                    ));
171                                }
172                            }
173                        }
174                    }
175                }
176            }
177        }
178    }
179
180    /**
181     * Collect evidence from SELECT list.
182     */
183    private void collectFromResultColumns(TResultColumnList resultColumns) {
184        if (resultColumns == null) {
185            return;
186        }
187
188        for (int i = 0; i < resultColumns.size(); i++) {
189            TResultColumn rc = resultColumns.getResultColumn(i);
190            if (rc == null || rc.getExpr() == null) {
191                continue;
192            }
193
194            // Skip star columns for now (handled separately)
195            if (rc.getExpr().getExpressionType() == EExpressionType.simple_object_name_t) {
196                TObjectName objName = rc.getExpr().getObjectOperand();
197                if (objName != null && !objName.toString().endsWith("*")) {
198                    collectFromObjectName(objName);
199                }
200            } else {
201                // Collect from complex expressions
202                collectFromExpression(rc.getExpr());
203            }
204        }
205    }
206
207    /**
208     * Collect evidence from GROUP BY clause.
209     */
210    private void collectFromGroupBy(TGroupBy groupBy) {
211        if (groupBy == null || groupBy.getItems() == null) {
212            return;
213        }
214
215        for (int i = 0; i < groupBy.getItems().size(); i++) {
216            TGroupByItem item = groupBy.getItems().getGroupByItem(i);
217            if (item != null && item.getExpr() != null) {
218                collectFromExpression(item.getExpr());
219            }
220        }
221    }
222
223    /**
224     * Collect evidence from ORDER BY clause.
225     */
226    private void collectFromOrderBy(TOrderBy orderBy) {
227        if (orderBy == null || orderBy.getItems() == null) {
228            return;
229        }
230
231        for (int i = 0; i < orderBy.getItems().size(); i++) {
232            TOrderByItem item = orderBy.getItems().getOrderByItem(i);
233            if (item != null && item.getSortKey() != null) {
234                collectFromExpression(item.getSortKey());
235            }
236        }
237    }
238
239    /**
240     * Get all collected evidence.
241     *
242     * @return list of evidence
243     */
244    public List<InferenceEvidence> getEvidence() {
245        return new ArrayList<>(evidence);
246    }
247
248    /**
249     * Clear all collected evidence.
250     */
251    public void clear() {
252        evidence.clear();
253    }
254
255    /**
256     * Get count of collected evidence.
257     *
258     * @return evidence count
259     */
260    public int getEvidenceCount() {
261        return evidence.size();
262    }
263}