001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.nodes.TResultColumn;
004import gudusoft.gsqlparser.nodes.TResultColumnList;
005import gudusoft.gsqlparser.nodes.TTable;
006import gudusoft.gsqlparser.resolver2.ColumnLevel;
007import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
008import gudusoft.gsqlparser.resolver2.model.ColumnSource;
009import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
010
011import java.util.*;
012
013/**
014 * Namespace representing a UNION/INTERSECT/EXCEPT query.
015 *
016 * Key characteristics:
017 * - Schema is defined by the FIRST branch (SQL standard)
018 * - Columns must be pushed down to ALL branches
019 * - hasStarColumn() returns true if ANY branch has SELECT *
020 * - addInferredColumn() propagates to ALL branches
021 *
022 * Example:
023 * FROM (
024 *     SELECT * FROM table_1
025 *     UNION ALL
026 *     SELECT * FROM table_2
027 *     UNION ALL
028 *     SELECT * FROM table_3
029 * ) Combined
030 *
031 * When outer query references "col_1", it should be pushed to ALL branches.
032 */
033public class UnionNamespace extends AbstractNamespace {
034
035    private final TSelectSqlStatement unionQuery;
036    private final String alias;
037
038    /** All branches of the UNION (flattened) */
039    private final List<TSelectSqlStatement> allBranches;
040
041    /** Namespace for each branch */
042    private final List<SubqueryNamespace> branchNamespaces;
043
044    /** Inferred columns from star push-down */
045    private Map<String, ColumnSource> inferredColumns;
046
047    /** Track inferred column names */
048    private Set<String> inferredColumnNames;
049
050    public UnionNamespace(TSelectSqlStatement unionQuery,
051                          String alias,
052                          INameMatcher nameMatcher) {
053        super(unionQuery, nameMatcher);
054        this.unionQuery = unionQuery;
055        this.alias = alias;
056
057        // Flatten all UNION branches
058        this.allBranches = new ArrayList<>();
059        flattenUnionBranches(unionQuery, allBranches);
060
061        // Create namespace for each branch
062        this.branchNamespaces = new ArrayList<>();
063        for (int i = 0; i < allBranches.size(); i++) {
064            TSelectSqlStatement branch = allBranches.get(i);
065            SubqueryNamespace branchNs = new SubqueryNamespace(branch, "branch_" + i, nameMatcher);
066            branchNamespaces.add(branchNs);
067        }
068    }
069
070    /**
071     * Recursively flatten UNION branches into a list.
072     * Handles nested UNION structures like: (A UNION B) UNION C
073     */
074    private void flattenUnionBranches(TSelectSqlStatement stmt, List<TSelectSqlStatement> branches) {
075        if (stmt == null) {
076            return;
077        }
078
079        if (stmt.isCombinedQuery()) {
080            // Recursively flatten left side
081            flattenUnionBranches(stmt.getLeftStmt(), branches);
082            // Recursively flatten right side
083            flattenUnionBranches(stmt.getRightStmt(), branches);
084        } else {
085            // This is a leaf branch - add it
086            branches.add(stmt);
087        }
088    }
089
090    @Override
091    public String getDisplayName() {
092        return alias != null ? alias : "<union>";
093    }
094
095    @Override
096    public TTable getFinalTable() {
097        // For UNION, return the first branch's final table
098        // This is used for single-table resolution
099        if (!branchNamespaces.isEmpty()) {
100            return branchNamespaces.get(0).getFinalTable();
101        }
102        return null;
103    }
104
105    @Override
106    public List<TTable> getAllFinalTables() {
107        // Return tables from ALL branches
108        List<TTable> allTables = new ArrayList<>();
109        for (SubqueryNamespace branchNs : branchNamespaces) {
110            branchNs.validate();
111            List<TTable> branchTables = branchNs.getAllFinalTables();
112            allTables.addAll(branchTables);
113        }
114        return allTables;
115    }
116
117    @Override
118    protected void doValidate() {
119        // Extract columns from first branch's SELECT list (SQL standard)
120        columnSources = new LinkedHashMap<>();
121
122        if (allBranches.isEmpty()) {
123            return;
124        }
125
126        // Validate all branch namespaces
127        for (SubqueryNamespace branchNs : branchNamespaces) {
128            branchNs.validate();
129        }
130
131        // Get columns from first branch (defines schema)
132        TSelectSqlStatement firstBranch = allBranches.get(0);
133        TResultColumnList selectList = firstBranch.getResultColumnList();
134        if (selectList == null) {
135            return;
136        }
137
138        for (int i = 0; i < selectList.size(); i++) {
139            TResultColumn resultCol = selectList.getResultColumn(i);
140            String colName = getColumnName(resultCol);
141            if (colName == null) {
142                colName = "col_" + (i + 1);
143            }
144
145            // For each column position, collect tables from ALL branches.
146            // UNION/MINUS columns are matched by POSITION, not by name.
147            // For each branch, check if the result column at that position is a simple column reference.
148            // If so, include all tables from that branch as potential sources.
149            List<TTable> columnTables = new ArrayList<>();
150
151            for (int branchIdx = 0; branchIdx < allBranches.size(); branchIdx++) {
152                TSelectSqlStatement branch = allBranches.get(branchIdx);
153                TResultColumnList branchSelectList = branch.getResultColumnList();
154                if (branchSelectList == null || i >= branchSelectList.size()) {
155                    continue;
156                }
157
158                TResultColumn branchResultCol = branchSelectList.getResultColumn(i);
159                if (branchResultCol == null || branchResultCol.getExpr() == null) {
160                    continue;
161                }
162
163                // Check if this result column is a simple column reference (not NULL or expression)
164                gudusoft.gsqlparser.nodes.TExpression expr = branchResultCol.getExpr();
165                if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
166                    gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
167                    if (objName != null) {
168                        // Get the column name at this position in the branch
169                        String branchColName = objName.getColumnNameOnly();
170
171                        // For UNION data lineage, only include tables where the column name
172                        // in this branch matches the column name from the first branch.
173                        // This prevents incorrect associations like CDS_APP.bankcode when
174                        // CDS_APP branch actually has c_mandant at that position.
175                        boolean columnNameMatches = nameMatcher.matches(branchColName, colName);
176
177                        // First try to get the source table from the column reference itself
178                        TTable sourceTable = objName.getSourceTable();
179                        if (sourceTable != null && !columnTables.contains(sourceTable) && columnNameMatches) {
180                            // Phase 1 resolved this column - add if column name matches
181                            columnTables.add(sourceTable);
182                        } else if (sourceTable == null && columnNameMatches) {
183                            // If sourceTable is not set (Phase 1 resolution hasn't happened),
184                            // check if there's a qualified reference (e.g., t.col)
185                            String tableQualifier = objName.getTableString();
186                            if (tableQualifier != null && !tableQualifier.isEmpty() && branch.tables != null) {
187                                // Qualified column - try to find the table by alias or name
188                                for (int ti = 0; ti < branch.tables.size(); ti++) {
189                                    TTable t = branch.tables.getTable(ti);
190                                    if (t != null) {
191                                        String alias = t.getAliasName();
192                                        String name = t.getTableName() != null ? t.getTableName().toString() : null;
193                                        if ((alias != null && alias.equalsIgnoreCase(tableQualifier)) ||
194                                            (name != null && name.equalsIgnoreCase(tableQualifier))) {
195                                            if (!columnTables.contains(t)) {
196                                                columnTables.add(t);
197                                            }
198                                            break;
199                                        }
200                                    }
201                                }
202                            } else if (branch.tables != null) {
203                                // Unqualified column reference - add only the first non-subquery/join table
204                                for (int ti = 0; ti < branch.tables.size(); ti++) {
205                                    TTable t = branch.tables.getTable(ti);
206                                    if (t != null && !columnTables.contains(t)) {
207                                        // Skip subqueries and joins - they're not final tables
208                                        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery &&
209                                            t.getTableType() != gudusoft.gsqlparser.ETableSource.join) {
210                                            columnTables.add(t);
211                                            break;  // Only add the first table for unqualified columns
212                                        }
213                                    }
214                                }
215                            }
216                        }
217                    }
218                }
219                // For non-column expressions (NULL, functions, etc.), don't add any tables
220            }
221
222            // Create column source with candidateTables from branches that have the column
223            // Always pass the list (even if empty) so getAllFinalTables() knows we explicitly
224            // determined the candidate tables rather than needing to delegate to namespace
225            ColumnSource source = new ColumnSource(
226                this,
227                colName,
228                resultCol,
229                1.0,
230                "union_column",
231                null,  // overrideTable
232                columnTables  // Pass empty list when no tables match
233            );
234
235            columnSources.put(colName, source);
236        }
237    }
238
239    /**
240     * Extract column name from TResultColumn.
241     */
242    private String getColumnName(TResultColumn resultCol) {
243        // Check for alias
244        if (resultCol.getAliasClause() != null &&
245            resultCol.getAliasClause().getAliasName() != null) {
246            return resultCol.getAliasClause().getAliasName().toString();
247        }
248
249        // Check for simple column reference
250        if (resultCol.getExpr() != null) {
251            gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr();
252            if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
253                gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
254                if (objName != null) {
255                    return objName.getColumnNameOnly();
256                }
257            }
258        }
259
260        return null;
261    }
262
263    @Override
264    public ColumnLevel hasColumn(String columnName) {
265        ensureValidated();
266
267        // Check in explicit columns from first branch
268        for (String existingCol : columnSources.keySet()) {
269            if (nameMatcher.matches(existingCol, columnName)) {
270                return ColumnLevel.EXISTS;
271            }
272        }
273
274        // Check in inferred columns
275        if (inferredColumns != null && inferredColumns.containsKey(columnName)) {
276            return ColumnLevel.EXISTS;
277        }
278
279        // If any branch has SELECT *, unknown columns MAYBE exist
280        if (hasStarColumn()) {
281            return ColumnLevel.MAYBE;
282        }
283
284        return ColumnLevel.NOT_EXISTS;
285    }
286
287    @Override
288    public ColumnSource resolveColumn(String columnName) {
289        ensureValidated();
290
291        // First check explicit columns from first branch
292        ColumnSource source = super.resolveColumn(columnName);
293        if (source != null) {
294            return source;
295        }
296
297        // Then check inferred columns
298        if (inferredColumns != null) {
299            for (Map.Entry<String, ColumnSource> entry : inferredColumns.entrySet()) {
300                if (nameMatcher.matches(entry.getKey(), columnName)) {
301                    return entry.getValue();
302                }
303            }
304        }
305
306        // If has star column, auto-infer this column
307        if (hasStarColumn()) {
308            boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_outer_reference");
309            if (added && inferredColumns != null) {
310                ColumnSource inferredSource = inferredColumns.get(columnName);
311                if (inferredSource != null) {
312                    return inferredSource;
313                }
314            }
315        }
316
317        return null;
318    }
319
320    @Override
321    public TSelectSqlStatement getSelectStatement() {
322        return unionQuery;
323    }
324
325    @Override
326    public boolean hasStarColumn() {
327        // Returns true if ANY branch has SELECT *
328        for (SubqueryNamespace branchNs : branchNamespaces) {
329            if (branchNs.hasStarColumn()) {
330                return true;
331            }
332        }
333        return false;
334    }
335
336    @Override
337    public boolean supportsDynamicInference() {
338        return hasStarColumn();
339    }
340
341    @Override
342    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
343        if (columnName == null || columnName.isEmpty()) {
344            return false;
345        }
346
347        // Initialize maps if needed
348        if (inferredColumns == null) {
349            inferredColumns = new LinkedHashMap<>();
350        }
351        if (inferredColumnNames == null) {
352            inferredColumnNames = new HashSet<>();
353        }
354
355        // Check if already exists in explicit columns
356        if (columnSources != null && columnSources.containsKey(columnName)) {
357            return false;
358        }
359
360        // Check if already inferred
361        if (inferredColumns.containsKey(columnName)) {
362            return false;
363        }
364
365        // Collect final tables from ALL branches that support dynamic inference (have SELECT *)
366        // For data lineage, we need to track that columns could come from any UNION branch
367        // The formatter will output all candidates for UNION columns when isCandidatesFromUnion is true
368        java.util.List<TTable> candidateTables = new java.util.ArrayList<>();
369        for (SubqueryNamespace branchNs : branchNamespaces) {
370            // Only collect from branches that could have inferred columns
371            if (!branchNs.supportsDynamicInference()) {
372                continue;
373            }
374            branchNs.validate();
375            java.util.List<TTable> branchTables = branchNs.getAllFinalTables();
376            for (TTable table : branchTables) {
377                if (table != null && !candidateTables.contains(table)) {
378                    candidateTables.add(table);
379                }
380            }
381        }
382
383        // Create inferred column source for this union namespace WITH candidate tables
384        ColumnSource source = new ColumnSource(
385            this,
386            columnName,
387            null,
388            confidence,
389            evidence,
390            null,  // overrideTable is null for UNION columns
391            candidateTables.isEmpty() ? null : candidateTables
392        );
393
394        inferredColumns.put(columnName, source);
395        inferredColumnNames.add(columnName);
396
397        if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
398            System.out.println("[UnionNamespace] Added '" + columnName + "' to " + alias +
399                ", propagating to " + branchNamespaces.size() + " branches");
400        }
401
402        // CRITICAL: Propagate to branches that support dynamic inference (have SELECT *)
403        // Only propagate to branches with star columns - branches with explicit column lists
404        // either have the column explicitly or don't have it at all.
405        for (SubqueryNamespace branchNs : branchNamespaces) {
406            // Only propagate to branches that can accept inferred columns
407            if (!branchNs.supportsDynamicInference()) {
408                if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
409                    System.out.println("[UnionNamespace] Skipping branch " + branchNs.getDisplayName() +
410                        " (no star column)");
411                }
412                continue;
413            }
414            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
415                System.out.println("[UnionNamespace] Propagating '" + columnName + "' to branch " + branchNs.getDisplayName());
416            }
417            branchNs.addInferredColumn(columnName, confidence, evidence + "_union_propagate");
418        }
419
420        return true;
421    }
422
423    @Override
424    public Set<String> getInferredColumns() {
425        if (inferredColumnNames == null) {
426            return Collections.emptySet();
427        }
428        return Collections.unmodifiableSet(inferredColumnNames);
429    }
430
431    /**
432     * Get all branch namespaces.
433     * Useful for external code that needs to iterate over branches.
434     */
435    public List<SubqueryNamespace> getBranchNamespaces() {
436        return Collections.unmodifiableList(branchNamespaces);
437    }
438
439    /**
440     * Get all branch SELECT statements.
441     */
442    public List<TSelectSqlStatement> getAllBranches() {
443        return Collections.unmodifiableList(allBranches);
444    }
445
446    /**
447     * Get the number of UNION branches.
448     */
449    public int getBranchCount() {
450        return allBranches.size();
451    }
452
453    @Override
454    public String toString() {
455        int totalColumns = (columnSources != null ? columnSources.size() : 0) +
456                          (inferredColumns != null ? inferredColumns.size() : 0);
457        return "UnionNamespace(" + getDisplayName() +
458               ", branches=" + allBranches.size() +
459               ", columns=" + totalColumns +
460               ", inferred=" + (inferredColumns != null ? inferredColumns.size() : 0) + ")";
461    }
462}