Source code

001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.nodes.TResultColumn;
004import gudusoft.gsqlparser.nodes.TResultColumnList;
005import gudusoft.gsqlparser.nodes.TTable;
006import gudusoft.gsqlparser.resolver2.ColumnLevel;
007import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
008import gudusoft.gsqlparser.resolver2.model.ColumnSource;
009import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
010
011import java.util.*;
012
013/**
014 * Namespace representing a UNION/INTERSECT/EXCEPT query.
015 *
016 * Key characteristics:
017 * - Schema is defined by the FIRST branch (SQL standard)
018 * - Columns must be pushed down to ALL branches
019 * - hasStarColumn() returns true if ANY branch has SELECT *
020 * - addInferredColumn() propagates to ALL branches
021 *
022 * Example:
023 * FROM (
024 *     SELECT * FROM table_1
025 *     UNION ALL
026 *     SELECT * FROM table_2
027 *     UNION ALL
028 *     SELECT * FROM table_3
029 * ) Combined
030 *
031 * When outer query references "col_1", it should be pushed to ALL branches.
032 */
033public class UnionNamespace extends AbstractNamespace {
034
035    private final TSelectSqlStatement unionQuery;
036    private final String alias;
037
038    /** All branches of the UNION (flattened) */
039    private final List<TSelectSqlStatement> allBranches;
040
041    /** Namespace for each branch */
042    private final List<SubqueryNamespace> branchNamespaces;
043
044    /** Inferred columns from star push-down */
045    private Map<String, ColumnSource> inferredColumns;
046
047    /** Track inferred column names */
048    private Set<String> inferredColumnNames;
049
050    public UnionNamespace(TSelectSqlStatement unionQuery,
051                          String alias,
052                          INameMatcher nameMatcher) {
053        super(unionQuery, nameMatcher);
054        this.unionQuery = unionQuery;
055        this.alias = alias;
056
057        // Flatten all UNION branches
058        this.allBranches = new ArrayList<>();
059        flattenUnionBranches(unionQuery, allBranches);
060
061        // Create namespace for each branch
062        this.branchNamespaces = new ArrayList<>();
063        for (int i = 0; i < allBranches.size(); i++) {
064            TSelectSqlStatement branch = allBranches.get(i);
065            SubqueryNamespace branchNs = new SubqueryNamespace(branch, "branch_" + i, nameMatcher);
066            branchNamespaces.add(branchNs);
067        }
068    }
069
070    /**
071     * Iteratively flatten UNION branches into a list.
072     * Handles nested UNION structures like: (A UNION B) UNION C
073     * Uses explicit stack to avoid StackOverflow on deeply nested chains.
074     */
075    private void flattenUnionBranches(TSelectSqlStatement stmt, List<TSelectSqlStatement> branches) {
076        if (stmt == null) {
077            return;
078        }
079
080        Deque<TSelectSqlStatement> stack = new ArrayDeque<>();
081        stack.push(stmt);
082
083        while (!stack.isEmpty()) {
084            TSelectSqlStatement current = stack.pop();
085            if (current == null) {
086                continue;
087            }
088            if (current.isCombinedQuery()) {
089                // Push right first so left is processed first (LIFO)
090                stack.push(current.getRightStmt());
091                stack.push(current.getLeftStmt());
092            } else {
093                branches.add(current);
094            }
095        }
096    }
097
098    @Override
099    public String getDisplayName() {
100        return alias != null ? alias : "<union>";
101    }
102
103    @Override
104    public TTable getFinalTable() {
105        // For UNION, return the first branch's final table
106        // This is used for single-table resolution
107        if (!branchNamespaces.isEmpty()) {
108            return branchNamespaces.get(0).getFinalTable();
109        }
110        return null;
111    }
112
113    @Override
114    public List<TTable> getAllFinalTables() {
115        // Return tables from ALL branches
116        List<TTable> allTables = new ArrayList<>();
117        for (SubqueryNamespace branchNs : branchNamespaces) {
118            branchNs.validate();
119            List<TTable> branchTables = branchNs.getAllFinalTables();
120            allTables.addAll(branchTables);
121        }
122        return allTables;
123    }
124
125    @Override
126    protected void doValidate() {
127        // Extract columns from first branch's SELECT list (SQL standard)
128        columnSources = new LinkedHashMap<>();
129
130        if (allBranches.isEmpty()) {
131            return;
132        }
133
134        // Validate all branch namespaces
135        for (SubqueryNamespace branchNs : branchNamespaces) {
136            branchNs.validate();
137        }
138
139        // Get columns from first branch (defines schema)
140        TSelectSqlStatement firstBranch = allBranches.get(0);
141        TResultColumnList selectList = firstBranch.getResultColumnList();
142        if (selectList == null) {
143            return;
144        }
145
146        for (int i = 0; i < selectList.size(); i++) {
147            TResultColumn resultCol = selectList.getResultColumn(i);
148            String colName = getColumnName(resultCol);
149            if (colName == null) {
150                colName = "col_" + (i + 1);
151            }
152
153            // For each column position, collect tables from ALL branches.
154            // UNION/MINUS columns are matched by POSITION, not by name.
155            // For each branch, check if the result column at that position is a simple column reference.
156            // If so, include all tables from that branch as potential sources.
157            List<TTable> columnTables = new ArrayList<>();
158
159            for (int branchIdx = 0; branchIdx < allBranches.size(); branchIdx++) {
160                TSelectSqlStatement branch = allBranches.get(branchIdx);
161                TResultColumnList branchSelectList = branch.getResultColumnList();
162                if (branchSelectList == null || i >= branchSelectList.size()) {
163                    continue;
164                }
165
166                TResultColumn branchResultCol = branchSelectList.getResultColumn(i);
167                if (branchResultCol == null || branchResultCol.getExpr() == null) {
168                    continue;
169                }
170
171                // Check if this result column is a simple column reference (not NULL or expression)
172                gudusoft.gsqlparser.nodes.TExpression expr = branchResultCol.getExpr();
173                if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
174                    gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
175                    if (objName != null) {
176                        // Get the column name at this position in the branch
177                        String branchColName = objName.getColumnNameOnly();
178
179                        // For UNION data lineage, only include tables where the column name
180                        // in this branch matches the column name from the first branch.
181                        // This prevents incorrect associations like CDS_APP.bankcode when
182                        // CDS_APP branch actually has c_mandant at that position.
183                        boolean columnNameMatches = nameMatcher.matches(branchColName, colName);
184
185                        // First try to get the source table from the column reference itself
186                        TTable sourceTable = objName.getSourceTable();
187                        if (sourceTable != null && !columnTables.contains(sourceTable) && columnNameMatches) {
188                            // Phase 1 resolved this column - add if column name matches
189                            columnTables.add(sourceTable);
190                        } else if (sourceTable == null && columnNameMatches) {
191                            // If sourceTable is not set (Phase 1 resolution hasn't happened),
192                            // check if there's a qualified reference (e.g., t.col)
193                            String tableQualifier = objName.getTableString();
194                            if (tableQualifier != null && !tableQualifier.isEmpty() && branch.tables != null) {
195                                // Qualified column - try to find the table by alias or name
196                                for (int ti = 0; ti < branch.tables.size(); ti++) {
197                                    TTable t = branch.tables.getTable(ti);
198                                    if (t != null) {
199                                        String alias = t.getAliasName();
200                                        String name = t.getTableName() != null ? t.getTableName().toString() : null;
201                                        if ((alias != null && alias.equalsIgnoreCase(tableQualifier)) ||
202                                            (name != null && name.equalsIgnoreCase(tableQualifier))) {
203                                            if (!columnTables.contains(t)) {
204                                                columnTables.add(t);
205                                            }
206                                            break;
207                                        }
208                                    }
209                                }
210                            } else if (branch.tables != null) {
211                                // Unqualified column reference - add only the first non-subquery/join table
212                                for (int ti = 0; ti < branch.tables.size(); ti++) {
213                                    TTable t = branch.tables.getTable(ti);
214                                    if (t != null && !columnTables.contains(t)) {
215                                        // Skip subqueries and joins - they're not final tables
216                                        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery &&
217                                            t.getTableType() != gudusoft.gsqlparser.ETableSource.join) {
218                                            columnTables.add(t);
219                                            break;  // Only add the first table for unqualified columns
220                                        }
221                                    }
222                                }
223                            }
224                        }
225                    }
226                }
227                // For non-column expressions (NULL, functions, etc.), don't add any tables
228            }
229
230            // Create column source with candidateTables from branches that have the column
231            // Always pass the list (even if empty) so getAllFinalTables() knows we explicitly
232            // determined the candidate tables rather than needing to delegate to namespace
233            ColumnSource source = new ColumnSource(
234                this,
235                colName,
236                resultCol,
237                1.0,
238                "union_column",
239                null,  // overrideTable
240                columnTables  // Pass empty list when no tables match
241            );
242
243            columnSources.put(colName, source);
244        }
245    }
246
247    /**
248     * Extract column name from TResultColumn.
249     */
250    private String getColumnName(TResultColumn resultCol) {
251        // Check for alias
252        if (resultCol.getAliasClause() != null &&
253            resultCol.getAliasClause().getAliasName() != null) {
254            return resultCol.getAliasClause().getAliasName().toString();
255        }
256
257        // Check for simple column reference
258        if (resultCol.getExpr() != null) {
259            gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr();
260            if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
261                gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
262                if (objName != null) {
263                    return objName.getColumnNameOnly();
264                }
265            }
266        }
267
268        return null;
269    }
270
271    @Override
272    public ColumnLevel hasColumn(String columnName) {
273        ensureValidated();
274
275        // Check in explicit columns from first branch
276        for (String existingCol : columnSources.keySet()) {
277            if (nameMatcher.matches(existingCol, columnName)) {
278                return ColumnLevel.EXISTS;
279            }
280        }
281
282        // Check in inferred columns
283        if (inferredColumns != null && inferredColumns.containsKey(columnName)) {
284            return ColumnLevel.EXISTS;
285        }
286
287        // If any branch has SELECT *, unknown columns MAYBE exist
288        if (hasStarColumn()) {
289            return ColumnLevel.MAYBE;
290        }
291
292        return ColumnLevel.NOT_EXISTS;
293    }
294
295    @Override
296    public ColumnSource resolveColumn(String columnName) {
297        ensureValidated();
298
299        // First check explicit columns from first branch
300        ColumnSource source = super.resolveColumn(columnName);
301        if (source != null) {
302            return source;
303        }
304
305        // Then check inferred columns
306        if (inferredColumns != null) {
307            for (Map.Entry<String, ColumnSource> entry : inferredColumns.entrySet()) {
308                if (nameMatcher.matches(entry.getKey(), columnName)) {
309                    return entry.getValue();
310                }
311            }
312        }
313
314        // If has star column, auto-infer this column
315        if (hasStarColumn()) {
316            boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_outer_reference");
317            if (added && inferredColumns != null) {
318                ColumnSource inferredSource = inferredColumns.get(columnName);
319                if (inferredSource != null) {
320                    return inferredSource;
321                }
322            }
323        }
324
325        return null;
326    }
327
328    @Override
329    public TSelectSqlStatement getSelectStatement() {
330        return unionQuery;
331    }
332
333    @Override
334    public boolean hasStarColumn() {
335        // Returns true if ANY branch has SELECT *
336        for (SubqueryNamespace branchNs : branchNamespaces) {
337            if (branchNs.hasStarColumn()) {
338                return true;
339            }
340        }
341        return false;
342    }
343
344    @Override
345    public boolean supportsDynamicInference() {
346        return hasStarColumn();
347    }
348
349    @Override
350    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
351        if (columnName == null || columnName.isEmpty()) {
352            return false;
353        }
354
355        // Initialize maps if needed
356        if (inferredColumns == null) {
357            inferredColumns = new LinkedHashMap<>();
358        }
359        if (inferredColumnNames == null) {
360            inferredColumnNames = new HashSet<>();
361        }
362
363        // Check if already exists in explicit columns
364        if (columnSources != null && columnSources.containsKey(columnName)) {
365            return false;
366        }
367
368        // Check if already inferred
369        if (inferredColumns.containsKey(columnName)) {
370            return false;
371        }
372
373        // Collect final tables from ALL branches that support dynamic inference (have SELECT *)
374        // For data lineage, we need to track that columns could come from any UNION branch
375        // The formatter will output all candidates for UNION columns when isCandidatesFromUnion is true
376        java.util.List<TTable> candidateTables = new java.util.ArrayList<>();
377        for (SubqueryNamespace branchNs : branchNamespaces) {
378            // Only collect from branches that could have inferred columns
379            if (!branchNs.supportsDynamicInference()) {
380                continue;
381            }
382            branchNs.validate();
383            java.util.List<TTable> branchTables = branchNs.getAllFinalTables();
384            for (TTable table : branchTables) {
385                if (table != null && !candidateTables.contains(table)) {
386                    candidateTables.add(table);
387                }
388            }
389        }
390
391        // Create inferred column source for this union namespace WITH candidate tables
392        ColumnSource source = new ColumnSource(
393            this,
394            columnName,
395            null,
396            confidence,
397            evidence,
398            null,  // overrideTable is null for UNION columns
399            candidateTables.isEmpty() ? null : candidateTables
400        );
401
402        inferredColumns.put(columnName, source);
403        inferredColumnNames.add(columnName);
404
405        if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
406            System.out.println("[UnionNamespace] Added '" + columnName + "' to " + alias +
407                ", propagating to " + branchNamespaces.size() + " branches");
408        }
409
410        // CRITICAL: Propagate to branches that support dynamic inference (have SELECT *)
411        // Only propagate to branches with star columns - branches with explicit column lists
412        // either have the column explicitly or don't have it at all.
413        for (SubqueryNamespace branchNs : branchNamespaces) {
414            // Only propagate to branches that can accept inferred columns
415            if (!branchNs.supportsDynamicInference()) {
416                if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
417                    System.out.println("[UnionNamespace] Skipping branch " + branchNs.getDisplayName() +
418                        " (no star column)");
419                }
420                continue;
421            }
422            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
423                System.out.println("[UnionNamespace] Propagating '" + columnName + "' to branch " + branchNs.getDisplayName());
424            }
425            branchNs.addInferredColumn(columnName, confidence, evidence + "_union_propagate");
426        }
427
428        return true;
429    }
430
431    @Override
432    public Set<String> getInferredColumns() {
433        if (inferredColumnNames == null) {
434            return Collections.emptySet();
435        }
436        return Collections.unmodifiableSet(inferredColumnNames);
437    }
438
439    /**
440     * Get all branch namespaces.
441     * Useful for external code that needs to iterate over branches.
442     */
443    public List<SubqueryNamespace> getBranchNamespaces() {
444        return Collections.unmodifiableList(branchNamespaces);
445    }
446
447    /**
448     * Get all branch SELECT statements.
449     */
450    public List<TSelectSqlStatement> getAllBranches() {
451        return Collections.unmodifiableList(allBranches);
452    }
453
454    /**
455     * Get the number of UNION branches.
456     */
457    public int getBranchCount() {
458        return allBranches.size();
459    }
460
461    @Override
462    public String toString() {
463        int totalColumns = (columnSources != null ? columnSources.size() : 0) +
464                          (inferredColumns != null ? inferredColumns.size() : 0);
465        return "UnionNamespace(" + getDisplayName() +
466               ", branches=" + allBranches.size() +
467               ", columns=" + totalColumns +
468               ", inferred=" + (inferredColumns != null ? inferredColumns.size() : 0) + ")";
469    }
470}