001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.EExpressionType;
004import gudusoft.gsqlparser.ETableSource;
005import gudusoft.gsqlparser.nodes.TExpression;
006import gudusoft.gsqlparser.nodes.TObjectName;
007import gudusoft.gsqlparser.nodes.TTable;
008import gudusoft.gsqlparser.nodes.TUnnestClause;
009import gudusoft.gsqlparser.resolver2.ColumnLevel;
010import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
011import gudusoft.gsqlparser.resolver2.model.ColumnSource;
012
013import java.util.ArrayList;
014import java.util.Collections;
015import java.util.HashSet;
016import java.util.LinkedHashMap;
017import java.util.List;
018import java.util.Set;
019
020/**
021 * Namespace representing an UNNEST table expression in BigQuery.
022 *
023 * UNNEST flattens an array into rows. For example:
024 * - UNNEST(['a', 'b', 'c']) creates a virtual table with rows 'a', 'b', 'c'
025 * - SELECT value FROM UNNEST(array_column) - 'value' is an implicit column name
026 *
027 * The UNNEST namespace provides:
028 * 1. An implicit column for the unnested elements (named by alias or 'value')
029 * 2. Support for WITH OFFSET which adds an 'offset' column
030 * 3. Support for STRUCT arrays which expose struct field names
031 */
032public class UnnestNamespace extends AbstractNamespace {
033
034    private final TTable unnestTable;
035    private final TUnnestClause unnestClause;
036    private final String alias;
037
038    /** The implicit column name for unnested elements */
039    private String implicitColumnName;
040
041    /** WITH OFFSET column name if present */
042    private String offsetColumnName;
043
044    /** Inferred columns for dynamic resolution */
045    private Set<String> inferredColumnNames;
046
047    /** Whether this UNNEST has an explicit alias (e.g., UNNEST(...) AS alias) */
048    private boolean hasExplicitAlias;
049
050    /** Whether the UNNEST array expression is a subquery (returns a struct that can be expanded) */
051    private boolean arrayExpressionIsSubquery;
052
053    public UnnestNamespace(TTable unnestTable, String alias, INameMatcher nameMatcher) {
054        super(unnestTable, nameMatcher);
055        this.unnestTable = unnestTable;
056        this.unnestClause = unnestTable.getUnnestClause();
057
058        // Determine if this is a real alias (from AS clause) or just the table name fallback
059        // For UNNEST tables, we should check if there's an explicit alias
060        String explicitAlias = unnestTable.getAliasName();
061        if (explicitAlias != null && !explicitAlias.isEmpty()) {
062            // Has explicit AS alias - use it for both table alias and implicit column
063            this.alias = explicitAlias;
064            this.implicitColumnName = explicitAlias;
065            this.hasExplicitAlias = true;
066        } else {
067            // No explicit alias - UNNEST without AS clause
068            // The implicit column name is "value" (BigQuery default for simple arrays)
069            this.alias = alias; // May be null or derived name
070            this.implicitColumnName = "value";
071            this.hasExplicitAlias = false;
072        }
073
074        // Check for WITH OFFSET
075        if (unnestClause != null && unnestClause.getWithOffset() != null) {
076            if (unnestClause.getWithOffsetAlais() != null &&
077                unnestClause.getWithOffsetAlais().getAliasName() != null) {
078                this.offsetColumnName = unnestClause.getWithOffsetAlais().getAliasName().toString();
079            } else {
080                this.offsetColumnName = "offset";
081            }
082        }
083
084        // Check if the array expression is a subquery (returns a struct)
085        // When UNNEST operates on a subquery that returns a struct, struct fields become accessible
086        // Example: UNNEST((SELECT struct_column FROM table))
087        this.arrayExpressionIsSubquery = false;
088        if (unnestClause != null) {
089            TExpression arrayExpr = unnestClause.getArrayExpr();
090            if (arrayExpr != null && arrayExpr.getExpressionType() == EExpressionType.subquery_t) {
091                this.arrayExpressionIsSubquery = true;
092            }
093        }
094    }
095
096    public UnnestNamespace(TTable unnestTable, String alias) {
097        this(unnestTable, alias, null);
098    }
099
100    @Override
101    public String getDisplayName() {
102        if (alias != null && !alias.isEmpty()) {
103            return alias;
104        }
105        return "(unnest table)";
106    }
107
108    @Override
109    public TTable getFinalTable() {
110        // UNNEST creates a virtual table, return the TTable representing it
111        return unnestTable;
112    }
113
114    @Override
115    public List<TTable> getAllFinalTables() {
116        List<TTable> tables = new ArrayList<>();
117        tables.add(unnestTable);
118        return tables;
119    }
120
121    @Override
122    protected void doValidate() {
123        columnSources = new LinkedHashMap<>();
124
125        // Check for explicit column aliases in table's alias clause
126        // For Presto/Trino syntax: UNNEST(array) AS t (col1, col2)
127        // The column names are in the alias clause's column list
128        boolean hasExplicitColumns = false;
129        if (unnestTable.getAliasClause() != null &&
130            unnestTable.getAliasClause().getColumns() != null &&
131            unnestTable.getAliasClause().getColumns().size() > 0) {
132
133            hasExplicitColumns = true;
134            for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) {
135                TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i);
136                if (colName != null) {
137                    String name = colName.toString();
138                    ColumnSource source = new ColumnSource(
139                        this,
140                        name,
141                        null,
142                        1.0,
143                        "unnest_explicit_column_alias"
144                    );
145                    columnSources.put(name, source);
146                }
147            }
148        }
149
150        // Add the implicit column for unnested elements (only if no explicit columns)
151        if (!hasExplicitColumns && implicitColumnName != null) {
152            ColumnSource implicitSource = new ColumnSource(
153                this,
154                implicitColumnName,
155                null,
156                1.0,
157                "unnest_implicit_column"
158            );
159            columnSources.put(implicitColumnName, implicitSource);
160        }
161
162        // Add WITH OFFSET column if present
163        if (offsetColumnName != null) {
164            ColumnSource offsetSource = new ColumnSource(
165                this,
166                offsetColumnName,
167                null,
168                1.0,
169                "unnest_offset_column"
170            );
171            columnSources.put(offsetColumnName, offsetSource);
172        }
173
174        // Add derived columns from STRUCT types if available
175        if (unnestClause != null && unnestClause.getDerivedColumnList() != null) {
176            for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) {
177                TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i);
178                if (derivedCol != null) {
179                    String colName = derivedCol.toString();
180                    ColumnSource derivedSource = new ColumnSource(
181                        this,
182                        colName,
183                        null,
184                        1.0,
185                        "unnest_struct_field"
186                    );
187                    columnSources.put(colName, derivedSource);
188                }
189            }
190        }
191    }
192
193    @Override
194    public boolean hasStarColumn() {
195        // UNNEST tables support star expansion for SELECT *
196        // but have fixed columns (implicit + offset + struct fields)
197        return true;
198    }
199
200    @Override
201    public boolean supportsDynamicInference() {
202        // Only allow dynamic inference for anonymous UNNEST (no explicit alias) that operates
203        // on a subquery returning a struct. In this case, struct fields become accessible
204        // as unqualified columns.
205        //
206        // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table))
207        // Here 'field' is a struct field that should be inferred from the UNNEST.
208        //
209        // For UNNEST with alias or UNNEST on a simple column, don't allow dynamic inference.
210        // This prevents external variables from being incorrectly attributed to the UNNEST.
211        return !hasExplicitAlias && arrayExpressionIsSubquery;
212    }
213
214    @Override
215    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
216        if (columnName == null || columnName.isEmpty()) {
217            return false;
218        }
219
220        // Only allow inferred columns when dynamic inference is supported.
221        // UNNEST with explicit alias has a fixed set of columns (implicit column, offset, struct fields)
222        // and should NOT accept arbitrary inferred columns from star push-down or enhancement.
223        // This prevents external variables (like function parameters) from being incorrectly
224        // attributed to the UNNEST table.
225        if (!supportsDynamicInference()) {
226            return false;
227        }
228
229        if (inferredColumnNames == null) {
230            inferredColumnNames = new HashSet<>();
231        }
232
233        // Slice S1: dedupe via matcher-aware helpers so per-vendor identifier
234        // rules (BigQuery columns case-insensitive; Oracle quoted vs unquoted)
235        // govern collision detection.
236        if (containsColumnByMatcher(columnSources, columnName)) {
237            return false;
238        }
239
240        if (containsColumnNameByMatcher(inferredColumnNames, columnName)) {
241            return false;
242        }
243
244        inferredColumnNames.add(columnName);
245
246        // Also add to column sources
247        if (columnSources != null) {
248            ColumnSource source = new ColumnSource(
249                this,
250                columnName,
251                null,
252                confidence,
253                evidence
254            );
255            columnSources.put(columnName, source);
256        }
257
258        return true;
259    }
260
261    @Override
262    public Set<String> getInferredColumns() {
263        if (inferredColumnNames == null) {
264            return Collections.emptySet();
265        }
266        return Collections.unmodifiableSet(inferredColumnNames);
267    }
268
269    @Override
270    public ColumnLevel hasColumn(String columnName) {
271        ensureValidated();
272
273        // Check explicit columns (implicit column, offset, struct fields).
274        if (containsColumnByMatcher(columnSources, columnName)) {
275            return ColumnLevel.EXISTS;
276        }
277
278        // Check inferred columns. Slice S1: route through matcher-aware
279        // helper so case-only-different references don't drop to NOT_EXISTS
280        // on case-insensitive vendors.
281        if (containsColumnNameByMatcher(inferredColumnNames, columnName)) {
282            return ColumnLevel.EXISTS;
283        }
284
285        // For anonymous UNNEST on a subquery (struct expansion), return MAYBE to allow inference.
286        // The actual column will be inferred in resolveColumn().
287        if (!hasExplicitAlias && arrayExpressionIsSubquery) {
288            return ColumnLevel.MAYBE;
289        }
290
291        // For other UNNEST types, don't auto-infer unknown columns.
292        // Unknown columns should resolve to outer scopes (correlated references)
293        return ColumnLevel.NOT_EXISTS;
294    }
295
296    @Override
297    public ColumnSource resolveColumn(String columnName) {
298        ensureValidated();
299
300        // Check explicit columns (implicit column, offset, struct fields)
301        ColumnSource source = super.resolveColumn(columnName);
302        if (source != null) {
303            return source;
304        }
305
306        // Check inferred columns (added via addInferredColumn or resolveQualifiedStructField).
307        // Slice S1: matcher-aware containment check; lookup in columnSources is
308        // matcher-aware via super.resolveColumn() above.
309        if (containsColumnNameByMatcher(inferredColumnNames, columnName)) {
310            ColumnSource viaMatcher = super.resolveColumn(columnName);
311            if (viaMatcher != null) {
312                return viaMatcher;
313            }
314            return columnSources != null ? columnSources.get(columnName) : null;
315        }
316
317        // For anonymous UNNEST on a subquery (struct expansion), dynamically infer struct fields.
318        // This allows struct field access without qualification when there's no alias.
319        // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table))
320        // Here 'field' is a struct field that should be inferred from the UNNEST.
321        if (!hasExplicitAlias && arrayExpressionIsSubquery) {
322            return resolveQualifiedStructField(columnName);
323        }
324
325        // For other UNNEST types, don't auto-infer unknown columns.
326        // Unknown unqualified columns should resolve to outer scopes or be marked as missed.
327        // For qualified struct field access (e.g., alias.field), use resolveQualifiedStructField().
328        return null;
329    }
330
331    /**
332     * Resolve a qualified struct field reference (e.g., "alias.field").
333     * This method infers struct fields when no metadata is available.
334     *
335     * <p>Use this method when resolving qualified column references like "x.field"
336     * where "x" is the UNNEST alias. This is appropriate for UNNEST of STRUCT arrays
337     * where the struct fields are accessed via the alias.
338     *
339     * <p>For unqualified column references, use {@link #resolveColumn(String)} which
340     * does NOT infer columns and returns null for unknown columns.
341     *
342     * @param columnName the column/field name (without the table qualifier)
343     * @return ColumnSource for the inferred struct field
344     */
345    public ColumnSource resolveQualifiedStructField(String columnName) {
346        if (columnName == null || columnName.isEmpty()) {
347            return null;
348        }
349
350        ensureValidated();
351
352        // Check if already known
353        ColumnSource existing = super.resolveColumn(columnName);
354        if (existing != null) {
355            return existing;
356        }
357
358        // Infer the column as a struct field
359        ColumnSource inferred = new ColumnSource(
360            this,
361            columnName,
362            null,  // No definition node
363            0.8,   // Moderate confidence - inferred from usage
364            "inferred_unnest_struct_field"
365        );
366
367        // Cache it for future lookups
368        if (inferredColumnNames == null) {
369            inferredColumnNames = new HashSet<>();
370        }
371        inferredColumnNames.add(columnName);
372        columnSources.put(columnName, inferred);
373
374        return inferred;
375    }
376
377    /**
378     * Get the array expression being unnested.
379     * This is used to track the source of the unnested data.
380     */
381    public TExpression getArrayExpression() {
382        return unnestClause != null ? unnestClause.getArrayExpr() : null;
383    }
384
385    /**
386     * Get the implicit column name for the unnested elements.
387     */
388    public String getImplicitColumnName() {
389        return implicitColumnName;
390    }
391
392    /**
393     * Get the offset column name if WITH OFFSET is present.
394     */
395    public String getOffsetColumnName() {
396        return offsetColumnName;
397    }
398
399    public TTable getUnnestTable() {
400        return unnestTable;
401    }
402
403    /**
404     * {@inheritDoc}
405     * For UnnestNamespace, returns the TTable representing the UNNEST expression.
406     * This is the immediate source table for columns resolved through this UNNEST.
407     */
408    @Override
409    public TTable getSourceTable() {
410        return unnestTable;
411    }
412
413    @Override
414    public String toString() {
415        return String.format("UnnestNamespace(%s, implicitCol=%s, offsetCol=%s)",
416            getDisplayName(),
417            implicitColumnName,
418            offsetColumnName);
419    }
420}