001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.nodes.TResultColumn;
004import gudusoft.gsqlparser.nodes.TResultColumnList;
005import gudusoft.gsqlparser.nodes.TTable;
006import gudusoft.gsqlparser.resolver2.ColumnLevel;
007import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
008import gudusoft.gsqlparser.resolver2.matcher.VendorNameMatcher;
009import gudusoft.gsqlparser.resolver2.model.ColumnSource;
010import gudusoft.gsqlparser.sqlenv.ESQLDataObjectType;
011import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
012
013import java.util.*;
014
015/**
016 * Namespace representing a UNION/INTERSECT/EXCEPT query.
017 *
018 * Key characteristics:
019 * - Schema is defined by the FIRST branch (SQL standard)
020 * - Columns must be pushed down to ALL branches
021 * - hasStarColumn() returns true if ANY branch has SELECT *
022 * - addInferredColumn() propagates to ALL branches
023 *
024 * Example:
025 * FROM (
026 *     SELECT * FROM table_1
027 *     UNION ALL
028 *     SELECT * FROM table_2
029 *     UNION ALL
030 *     SELECT * FROM table_3
031 * ) Combined
032 *
033 * When outer query references "col_1", it should be pushed to ALL branches.
034 */
035public class UnionNamespace extends AbstractNamespace {
036
037    private final TSelectSqlStatement unionQuery;
038    private final String alias;
039
040    /** All branches of the UNION (flattened) */
041    private final List<TSelectSqlStatement> allBranches;
042
043    /** Namespace for each branch */
044    private final List<SubqueryNamespace> branchNamespaces;
045
046    /** Inferred columns from star push-down */
047    private Map<String, ColumnSource> inferredColumns;
048
049    /** Track inferred column names */
050    private Set<String> inferredColumnNames;
051
052    public UnionNamespace(TSelectSqlStatement unionQuery,
053                          String alias,
054                          INameMatcher nameMatcher) {
055        super(unionQuery, nameMatcher);
056        this.unionQuery = unionQuery;
057        this.alias = alias;
058
059        // Flatten all UNION branches
060        this.allBranches = new ArrayList<>();
061        flattenUnionBranches(unionQuery, allBranches);
062
063        // Create namespace for each branch
064        this.branchNamespaces = new ArrayList<>();
065        for (int i = 0; i < allBranches.size(); i++) {
066            TSelectSqlStatement branch = allBranches.get(i);
067            SubqueryNamespace branchNs = new SubqueryNamespace(branch, "branch_" + i, nameMatcher);
068            branchNamespaces.add(branchNs);
069        }
070    }
071
072    /**
073     * Iteratively flatten UNION branches into a list.
074     * Handles nested UNION structures like: (A UNION B) UNION C
075     * Uses explicit stack to avoid StackOverflow on deeply nested chains.
076     */
077    private void flattenUnionBranches(TSelectSqlStatement stmt, List<TSelectSqlStatement> branches) {
078        if (stmt == null) {
079            return;
080        }
081
082        Deque<TSelectSqlStatement> stack = new ArrayDeque<>();
083        stack.push(stmt);
084
085        while (!stack.isEmpty()) {
086            TSelectSqlStatement current = stack.pop();
087            if (current == null) {
088                continue;
089            }
090            if (current.isCombinedQuery()) {
091                // Push right first so left is processed first (LIFO)
092                stack.push(current.getRightStmt());
093                stack.push(current.getLeftStmt());
094            } else {
095                branches.add(current);
096            }
097        }
098    }
099
100    @Override
101    public String getDisplayName() {
102        return alias != null ? alias : "<union>";
103    }
104
105    @Override
106    public TTable getFinalTable() {
107        // For UNION, return the first branch's final table
108        // This is used for single-table resolution
109        if (!branchNamespaces.isEmpty()) {
110            return branchNamespaces.get(0).getFinalTable();
111        }
112        return null;
113    }
114
115    @Override
116    public List<TTable> getAllFinalTables() {
117        // Return tables from ALL branches
118        List<TTable> allTables = new ArrayList<>();
119        for (SubqueryNamespace branchNs : branchNamespaces) {
120            branchNs.validate();
121            List<TTable> branchTables = branchNs.getAllFinalTables();
122            allTables.addAll(branchTables);
123        }
124        return allTables;
125    }
126
127    @Override
128    protected void doValidate() {
129        // Extract columns from first branch's SELECT list (SQL standard)
130        columnSources = new LinkedHashMap<>();
131
132        if (allBranches.isEmpty()) {
133            return;
134        }
135
136        // Validate all branch namespaces
137        for (SubqueryNamespace branchNs : branchNamespaces) {
138            branchNs.validate();
139        }
140
141        // Get columns from first branch (defines schema)
142        TSelectSqlStatement firstBranch = allBranches.get(0);
143        TResultColumnList selectList = firstBranch.getResultColumnList();
144        if (selectList == null) {
145            return;
146        }
147
148        for (int i = 0; i < selectList.size(); i++) {
149            TResultColumn resultCol = selectList.getResultColumn(i);
150            String colName = getColumnName(resultCol);
151            if (colName == null) {
152                colName = "col_" + (i + 1);
153            }
154
155            // For each column position, collect tables from ALL branches.
156            // UNION/MINUS columns are matched by POSITION, not by name.
157            // For each branch, check if the result column at that position is a simple column reference.
158            // If so, include all tables from that branch as potential sources.
159            List<TTable> columnTables = new ArrayList<>();
160
161            for (int branchIdx = 0; branchIdx < allBranches.size(); branchIdx++) {
162                TSelectSqlStatement branch = allBranches.get(branchIdx);
163                TResultColumnList branchSelectList = branch.getResultColumnList();
164                if (branchSelectList == null || i >= branchSelectList.size()) {
165                    continue;
166                }
167
168                TResultColumn branchResultCol = branchSelectList.getResultColumn(i);
169                if (branchResultCol == null || branchResultCol.getExpr() == null) {
170                    continue;
171                }
172
173                // Check if this result column is a simple column reference (not NULL or expression)
174                gudusoft.gsqlparser.nodes.TExpression expr = branchResultCol.getExpr();
175                if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
176                    gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
177                    if (objName != null) {
178                        // Get the column name at this position in the branch
179                        String branchColName = objName.getColumnNameOnly();
180
181                        // For UNION data lineage, only include tables where the column name
182                        // in this branch matches the column name from the first branch.
183                        // This prevents incorrect associations like CDS_APP.bankcode when
184                        // CDS_APP branch actually has c_mandant at that position.
185                        boolean columnNameMatches = nameMatcher.matches(branchColName, colName);
186
187                        // First try to get the source table from the column reference itself
188                        TTable sourceTable = objName.getSourceTable();
189                        if (sourceTable != null && !columnTables.contains(sourceTable) && columnNameMatches) {
190                            // Phase 1 resolved this column - add if column name matches
191                            columnTables.add(sourceTable);
192                        } else if (sourceTable == null && columnNameMatches) {
193                            // If sourceTable is not set (Phase 1 resolution hasn't happened),
194                            // check if there's a qualified reference (e.g., t.col)
195                            String tableQualifier = objName.getTableString();
196                            if (tableQualifier != null && !tableQualifier.isEmpty() && branch.tables != null) {
197                                // Qualified column - try to find the table by alias or name.
198                                // Cleanup follow-up to S15: route through the per-dialect
199                                // {@link gudusoft.gsqlparser.resolver2.matcher.INameMatcher},
200                                // and when the matcher is a {@link VendorNameMatcher}, route
201                                // through {@link ESQLDataObjectType#dotTable} explicitly. The
202                                // 2-arg {@code matches(...)} default binds to {@code dotColumn}
203                                // inside {@code VendorNameMatcher}, which produces wrong
204                                // results for BigQuery / MySQL on table-qualifier compares
205                                // (BigQuery: tables SENSITIVE / columns INSENSITIVE; MySQL:
206                                // tables depend on lower_case_table_names, columns always
207                                // INSENSITIVE). Same routing pattern as ListBasedScope and
208                                // CTEScope below.
209                                for (int ti = 0; ti < branch.tables.size(); ti++) {
210                                    TTable t = branch.tables.getTable(ti);
211                                    if (t != null) {
212                                        String alias = t.getAliasName();
213                                        String name = t.getTableName() != null ? t.getTableName().toString() : null;
214                                        if ((alias != null && tableNameMatches(alias, tableQualifier)) ||
215                                            (name != null && tableNameMatches(name, tableQualifier))) {
216                                            if (!columnTables.contains(t)) {
217                                                columnTables.add(t);
218                                            }
219                                            break;
220                                        }
221                                    }
222                                }
223                            } else if (branch.tables != null) {
224                                // Unqualified column reference - add only the first non-subquery/join table
225                                for (int ti = 0; ti < branch.tables.size(); ti++) {
226                                    TTable t = branch.tables.getTable(ti);
227                                    if (t != null && !columnTables.contains(t)) {
228                                        // Skip subqueries and joins - they're not final tables
229                                        if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery &&
230                                            t.getTableType() != gudusoft.gsqlparser.ETableSource.join) {
231                                            columnTables.add(t);
232                                            break;  // Only add the first table for unqualified columns
233                                        }
234                                    }
235                                }
236                            }
237                        }
238                    }
239                }
240                // For non-column expressions (NULL, functions, etc.), don't add any tables
241            }
242
243            // Create column source with candidateTables from branches that have the column
244            // Always pass the list (even if empty) so getAllFinalTables() knows we explicitly
245            // determined the candidate tables rather than needing to delegate to namespace
246            ColumnSource source = new ColumnSource(
247                this,
248                colName,
249                resultCol,
250                1.0,
251                "union_column",
252                null,  // overrideTable
253                columnTables  // Pass empty list when no tables match
254            );
255
256            columnSources.put(colName, source);
257        }
258    }
259
260    /**
261     * Extract column name from TResultColumn.
262     */
263    private String getColumnName(TResultColumn resultCol) {
264        // Check for alias
265        if (resultCol.getAliasClause() != null &&
266            resultCol.getAliasClause().getAliasName() != null) {
267            return resultCol.getAliasClause().getAliasName().toString();
268        }
269
270        // Check for simple column reference
271        if (resultCol.getExpr() != null) {
272            gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr();
273            if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) {
274                gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand();
275                if (objName != null) {
276                    return objName.getColumnNameOnly();
277                }
278            }
279        }
280
281        return null;
282    }
283
284    @Override
285    public ColumnLevel hasColumn(String columnName) {
286        ensureValidated();
287
288        // Check in explicit columns from first branch (raw-keyed map; matcher-aware lookup
289        // routes through INameMatcher / IdentifierService so per-dialect rules apply).
290        if (containsColumnByMatcher(columnSources, columnName)) {
291            return ColumnLevel.EXISTS;
292        }
293
294        // Check in inferred columns. The map is raw-keyed (= ColumnSource.
295        // exposedName) — see addInferredColumn for why round-2 reverted from
296        // normalized keys. The matcher-aware helper handles per-dialect case
297        // rules including SQL Server COLLATION_BASED.
298        if (containsColumnByMatcher(inferredColumns, columnName)) {
299            return ColumnLevel.EXISTS;
300        }
301
302        // If any branch has SELECT *, unknown columns MAYBE exist
303        if (hasStarColumn()) {
304            return ColumnLevel.MAYBE;
305        }
306
307        return ColumnLevel.NOT_EXISTS;
308    }
309
310    /**
311     * Binding-diagnostic view of the set-operation output schema.
312     *
313     * <p>SQL exposes set-operation columns using the first branch's output
314     * names. Branch-local names from later operands are not visible to an
315     * outer query. When any branch contains {@code SELECT *}, the output
316     * shape is not authoritative for missing-output diagnostics.</p>
317     */
318    public ColumnLevel hasAuthoritativeOutputColumn(String columnName) {
319        ensureValidated();
320
321        if (columnName == null || columnName.isEmpty()) {
322            return ColumnLevel.MAYBE;
323        }
324
325        if (containsColumnByMatcher(columnSources, columnName)) {
326            return ColumnLevel.EXISTS;
327        }
328
329        if (containsColumnByMatcher(inferredColumns, columnName)) {
330            return ColumnLevel.EXISTS;
331        }
332
333        if (hasStarColumn()) {
334            return ColumnLevel.MAYBE;
335        }
336
337        if (columnSources != null && !columnSources.isEmpty()) {
338            return ColumnLevel.NOT_EXISTS;
339        }
340
341        return ColumnLevel.MAYBE;
342    }
343
344    /**
345     * Cleanup follow-up to S15: route table-qualifier compares through
346     * {@link ESQLDataObjectType#dotTable} when the matcher is vendor-aware.
347     * Without this, the 2-arg {@link INameMatcher#matches(String, String)}
348     * defaults to {@code dotColumn} semantics inside {@link VendorNameMatcher},
349     * producing wrong results on BigQuery / MySQL.
350     */
351    private boolean tableNameMatches(String storedName, String tableQualifier) {
352        if (nameMatcher instanceof VendorNameMatcher) {
353            return ((VendorNameMatcher) nameMatcher).matches(storedName, tableQualifier, ESQLDataObjectType.dotTable);
354        }
355        return nameMatcher.matches(storedName, tableQualifier);
356    }
357
358    @Override
359    public ColumnSource resolveColumn(String columnName) {
360        ensureValidated();
361
362        // First check explicit columns from first branch
363        ColumnSource source = super.resolveColumn(columnName);
364        if (source != null) {
365            return source;
366        }
367
368        // Then check inferred columns. Slice S1 + codex round 2: the map is
369        // raw-keyed (= ColumnSource.exposedName), so the exact-match probe
370        // is O(1) for the common case where the same identifier is queried
371        // again. For case-only-different references, the matcher loop walks
372        // values via getExposedName() so quote state is preserved.
373        if (inferredColumns != null) {
374            ColumnSource exact = inferredColumns.get(columnName);
375            if (exact != null) {
376                return exact;
377            }
378            for (ColumnSource entry : inferredColumns.values()) {
379                String exposed = entry != null ? entry.getExposedName() : null;
380                if (exposed != null && nameMatcher.matches(exposed, columnName)) {
381                    return entry;
382                }
383            }
384        }
385
386        // If has star column, auto-infer this column
387        if (hasStarColumn()) {
388            boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_outer_reference");
389            if (added && inferredColumns != null) {
390                ColumnSource inferredSource = inferredColumns.get(columnName);
391                if (inferredSource != null) {
392                    return inferredSource;
393                }
394            }
395        }
396
397        return null;
398    }
399
400    @Override
401    public TSelectSqlStatement getSelectStatement() {
402        return unionQuery;
403    }
404
405    @Override
406    public boolean hasStarColumn() {
407        // Returns true if ANY branch has SELECT *
408        for (SubqueryNamespace branchNs : branchNamespaces) {
409            if (branchNs.hasStarColumn()) {
410                return true;
411            }
412        }
413        return false;
414    }
415
416    @Override
417    public boolean supportsDynamicInference() {
418        return hasStarColumn();
419    }
420
421    /**
422     * Slice S4 (plan §5.5): a set-operation's output schema is defined by the
423     * first branch (SQL standard, see class javadoc). The schema is
424     * authoritative once validated AND the first branch projects named
425     * columns; if any branch contains an unresolved {@code SELECT *} we treat
426     * the output as METADATA_UNAVAILABLE because position-based column
427     * matching across branches has not yet completed. S11 will refine this
428     * once arity-mismatch handling lands.
429     */
430    @Override
431    public MetadataState getMetadataState() {
432        ensureValidated();
433        if (columnSources == null || columnSources.isEmpty() || hasStarColumn()) {
434            return MetadataState.METADATA_UNAVAILABLE;
435        }
436        return MetadataState.FOUND;
437    }
438
439    @Override
440    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
441        if (columnName == null || columnName.isEmpty()) {
442            return false;
443        }
444
445        // Initialize maps if needed
446        if (inferredColumns == null) {
447            inferredColumns = new LinkedHashMap<>();
448        }
449        if (inferredColumnNames == null) {
450            inferredColumnNames = new HashSet<>();
451        }
452
453        // Slice S1: dedupe checks must respect per-vendor identifier rules so
454        // that case-only-different inputs do not leave the map with two entries
455        // (one of which is then non-deterministically picked by the matcher
456        // loops in hasColumn / resolveColumn).
457        //
458        // Codex round 2 caveat: the dedupe gate must use {@link
459        // #containsColumnByMatcher} (which walks ColumnSource.exposedName for
460        // value compares) and the storage key must be the original-cased
461        // identifier — NOT the normalized form. Two matcher-distinct
462        // identifiers can normalize to the same key (e.g. Postgres {@code
463        // "mycol"} and unquoted {@code MYCOL} both normalize to {@code
464        // mycol}), so a normalized-keyed map cannot represent both. Raw
465        // (exposedName) keys avoid this collision.
466        if (containsColumnByMatcher(columnSources, columnName)) {
467            return false;
468        }
469        if (containsColumnByMatcher(inferredColumns, columnName)) {
470            return false;
471        }
472
473        // Collect final tables from ALL branches that support dynamic inference (have SELECT *)
474        // For data lineage, we need to track that columns could come from any UNION branch
475        // The formatter will output all candidates for UNION columns when isCandidatesFromUnion is true
476        java.util.List<TTable> candidateTables = new java.util.ArrayList<>();
477        for (SubqueryNamespace branchNs : branchNamespaces) {
478            // Only collect from branches that could have inferred columns
479            if (!branchNs.supportsDynamicInference()) {
480                continue;
481            }
482            branchNs.validate();
483            java.util.List<TTable> branchTables = branchNs.getAllFinalTables();
484            for (TTable table : branchTables) {
485                if (table != null && !candidateTables.contains(table)) {
486                    candidateTables.add(table);
487                }
488            }
489        }
490
491        // Create inferred column source for this union namespace WITH candidate tables
492        ColumnSource source = new ColumnSource(
493            this,
494            columnName,
495            null,
496            confidence,
497            evidence,
498            null,  // overrideTable is null for UNION columns
499            candidateTables.isEmpty() ? null : candidateTables
500        );
501
502        inferredColumns.put(columnName, source);
503        inferredColumnNames.add(columnName);
504
505        if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
506            System.out.println("[UnionNamespace] Added '" + columnName + "' to " + alias +
507                ", propagating to " + branchNamespaces.size() + " branches");
508        }
509
510        // CRITICAL: Propagate to branches that support dynamic inference (have SELECT *)
511        // Only propagate to branches with star columns - branches with explicit column lists
512        // either have the column explicitly or don't have it at all.
513        for (SubqueryNamespace branchNs : branchNamespaces) {
514            // Only propagate to branches that can accept inferred columns
515            if (!branchNs.supportsDynamicInference()) {
516                if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
517                    System.out.println("[UnionNamespace] Skipping branch " + branchNs.getDisplayName() +
518                        " (no star column)");
519                }
520                continue;
521            }
522            if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) {
523                System.out.println("[UnionNamespace] Propagating '" + columnName + "' to branch " + branchNs.getDisplayName());
524            }
525            branchNs.addInferredColumn(columnName, confidence, evidence + "_union_propagate");
526        }
527
528        return true;
529    }
530
531    @Override
532    public Set<String> getInferredColumns() {
533        if (inferredColumnNames == null) {
534            return Collections.emptySet();
535        }
536        return Collections.unmodifiableSet(inferredColumnNames);
537    }
538
539    /**
540     * Get all branch namespaces.
541     * Useful for external code that needs to iterate over branches.
542     */
543    public List<SubqueryNamespace> getBranchNamespaces() {
544        return Collections.unmodifiableList(branchNamespaces);
545    }
546
547    /**
548     * Get all branch SELECT statements.
549     */
550    public List<TSelectSqlStatement> getAllBranches() {
551        return Collections.unmodifiableList(allBranches);
552    }
553
554    /**
555     * Get the number of UNION branches.
556     */
557    public int getBranchCount() {
558        return allBranches.size();
559    }
560
561    @Override
562    public String toString() {
563        int totalColumns = (columnSources != null ? columnSources.size() : 0) +
564                          (inferredColumns != null ? inferredColumns.size() : 0);
565        return "UnionNamespace(" + getDisplayName() +
566               ", branches=" + allBranches.size() +
567               ", columns=" + totalColumns +
568               ", inferred=" + (inferredColumns != null ? inferredColumns.size() : 0) + ")";
569    }
570}