001package gudusoft.gsqlparser.resolver2.inference;
002
003import gudusoft.gsqlparser.nodes.TTable;
004import gudusoft.gsqlparser.resolver2.model.ColumnSource;
005
006import java.util.*;
007
008/**
009 * Engine for inferring column-to-table relationships without metadata.
010 *
011 * <p>The inference engine collects evidence from various sources in the SQL
012 * statement and uses it to infer which columns belong to which tables.
013 * This is particularly useful when:
014 * - Database metadata is not available
015 * - Dealing with SELECT * without schema information
016 * - Analyzing SQL from unknown sources
017 *
018 * <p>Inference process:
019 * 1. Collect evidence from SQL statement (WHERE, JOIN, SELECT, etc.)
020 * 2. Aggregate evidence by table and column
021 * 3. Calculate confidence scores
022 * 4. Generate inferred column sources
023 *
024 * <p>Example:
025 * <pre>
026 * SELECT * FROM employees e
027 * WHERE e.department_id = 10
028 *   AND e.salary > 50000
029 *
030 * Inference:
031 * - "department_id" column exists in "employees" (confidence: 0.95)
032 * - "salary" column exists in "employees" (confidence: 0.95)
033 * </pre>
034 */
035public class InferenceEngine {
036
037    /** Evidence collected for inference */
038    private final List<InferenceEvidence> evidenceList = new ArrayList<>();
039
040    /** Inferred columns by table */
041    private final Map<String, Set<String>> inferredColumnsByTable = new HashMap<>();
042
043    /** Evidence aggregated by table.column */
044    private final Map<String, List<InferenceEvidence>> evidenceByColumn = new HashMap<>();
045
046    /**
047     * Add a piece of evidence for inference.
048     *
049     * @param evidence the evidence to add
050     */
051    public void addEvidence(InferenceEvidence evidence) {
052        if (evidence == null) {
053            return;
054        }
055
056        evidenceList.add(evidence);
057
058        // Index by table.column
059        String key = makeKey(evidence.getTableName(), evidence.getColumnName());
060        evidenceByColumn.computeIfAbsent(key, k -> new ArrayList<>()).add(evidence);
061
062        // Track inferred columns
063        inferredColumnsByTable
064            .computeIfAbsent(evidence.getTableName(), k -> new HashSet<>())
065            .add(evidence.getColumnName());
066    }
067
068    /**
069     * Add multiple pieces of evidence.
070     *
071     * @param evidences the evidence to add
072     */
073    public void addAllEvidence(Collection<InferenceEvidence> evidences) {
074        if (evidences != null) {
075            for (InferenceEvidence evidence : evidences) {
076                addEvidence(evidence);
077            }
078        }
079    }
080
081    /**
082     * Get all inferred columns for a table.
083     *
084     * @param tableName the table name
085     * @return set of inferred column names, or empty set if none
086     */
087    public Set<String> getInferredColumns(String tableName) {
088        Set<String> columns = inferredColumnsByTable.get(tableName);
089        return columns != null ? new HashSet<>(columns) : Collections.emptySet();
090    }
091
092    /**
093     * Get all evidence for a specific table.column.
094     *
095     * @param tableName the table name
096     * @param columnName the column name
097     * @return list of evidence, or empty list if none
098     */
099    public List<InferenceEvidence> getEvidence(String tableName, String columnName) {
100        String key = makeKey(tableName, columnName);
101        List<InferenceEvidence> evidence = evidenceByColumn.get(key);
102        return evidence != null ? new ArrayList<>(evidence) : Collections.emptyList();
103    }
104
105    /**
106     * Calculate the combined confidence for a table.column based on all evidence.
107     *
108     * <p>Combines multiple pieces of evidence using formula:
109     * <pre>
110     * combined = 1 - ∏(1 - conf_i)
111     * </pre>
112     *
113     * This means:
114     * - Multiple pieces of evidence increase confidence
115     * - Evidence is independent (multiplicative combination)
116     * - Result is always in [0, 1]
117     *
118     * @param tableName the table name
119     * @param columnName the column name
120     * @return combined confidence [0.0, 1.0], or 0.0 if no evidence
121     */
122    public double calculateConfidence(String tableName, String columnName) {
123        List<InferenceEvidence> evidence = getEvidence(tableName, columnName);
124
125        if (evidence.isEmpty()) {
126            return 0.0;
127        }
128
129        // Combine confidence using complementary probability
130        double complementaryProduct = 1.0;
131        for (InferenceEvidence ev : evidence) {
132            complementaryProduct *= (1.0 - ev.getConfidence());
133        }
134
135        return 1.0 - complementaryProduct;
136    }
137
138    /**
139     * Create an inferred ColumnSource for a table.column.
140     *
141     * @param tableName the table name
142     * @param columnName the column name
143     * @param table the TTable object (may be null if not available)
144     * @return ColumnSource with inferred confidence, or null if no evidence
145     */
146    public ColumnSource createInferredColumnSource(
147            String tableName,
148            String columnName,
149            TTable table) {
150
151        List<InferenceEvidence> evidence = getEvidence(tableName, columnName);
152        if (evidence.isEmpty()) {
153            return null;
154        }
155
156        double confidence = calculateConfidence(tableName, columnName);
157
158        // Build evidence description
159        StringBuilder evidenceDesc = new StringBuilder();
160        evidenceDesc.append("inferred from: ");
161        for (int i = 0; i < evidence.size(); i++) {
162            if (i > 0) evidenceDesc.append(", ");
163            evidenceDesc.append(evidence.get(i).getEvidenceType());
164        }
165
166        // Create a namespace-less ColumnSource (we don't have actual metadata)
167        // This is a marker that indicates the column was inferred
168        return new ColumnSource(
169            null,  // namespace not available for inferred columns
170            columnName,
171            evidence.get(0).getSourceNode(),  // Use first evidence as source
172            confidence,
173            evidenceDesc.toString()
174        ) {
175            // Override to return the table we inferred for
176            @Override
177            public TTable getFinalTable() {
178                return table;
179            }
180        };
181    }
182
183    /**
184     * Get all tables that have inferred columns.
185     *
186     * @return set of table names with inferred columns
187     */
188    public Set<String> getTablesWithInferences() {
189        return new HashSet<>(inferredColumnsByTable.keySet());
190    }
191
192    /**
193     * Get total number of pieces of evidence collected.
194     *
195     * @return evidence count
196     */
197    public int getEvidenceCount() {
198        return evidenceList.size();
199    }
200
201    /**
202     * Get total number of inferred columns across all tables.
203     *
204     * @return inferred column count
205     */
206    public int getInferredColumnCount() {
207        int count = 0;
208        for (Set<String> columns : inferredColumnsByTable.values()) {
209            count += columns.size();
210        }
211        return count;
212    }
213
214    /**
215     * Clear all evidence and inferred columns.
216     */
217    public void clear() {
218        evidenceList.clear();
219        inferredColumnsByTable.clear();
220        evidenceByColumn.clear();
221    }
222
223    /**
224     * Get statistics about the inference engine state.
225     *
226     * @return summary string
227     */
228    public String getStatistics() {
229        return String.format(
230            "InferenceEngine[tables=%d, columns=%d, evidence=%d]",
231            inferredColumnsByTable.size(),
232            getInferredColumnCount(),
233            evidenceList.size()
234        );
235    }
236
237    /**
238     * Make a key for indexing table.column evidence.
239     */
240    private String makeKey(String tableName, String columnName) {
241        return tableName + "." + columnName;
242    }
243
244    @Override
245    public String toString() {
246        return getStatistics();
247    }
248}