001package gudusoft.gsqlparser.resolver2.namespace;
002
003import gudusoft.gsqlparser.EExpressionType;
004import gudusoft.gsqlparser.ETableSource;
005import gudusoft.gsqlparser.nodes.TExpression;
006import gudusoft.gsqlparser.nodes.TObjectName;
007import gudusoft.gsqlparser.nodes.TTable;
008import gudusoft.gsqlparser.nodes.TUnnestClause;
009import gudusoft.gsqlparser.resolver2.ColumnLevel;
010import gudusoft.gsqlparser.resolver2.matcher.INameMatcher;
011import gudusoft.gsqlparser.resolver2.model.ColumnSource;
012
013import java.util.ArrayList;
014import java.util.Collections;
015import java.util.HashSet;
016import java.util.LinkedHashMap;
017import java.util.List;
018import java.util.Set;
019
020/**
021 * Namespace representing an UNNEST table expression in BigQuery.
022 *
023 * UNNEST flattens an array into rows. For example:
024 * - UNNEST(['a', 'b', 'c']) creates a virtual table with rows 'a', 'b', 'c'
025 * - SELECT value FROM UNNEST(array_column) - 'value' is an implicit column name
026 *
027 * The UNNEST namespace provides:
028 * 1. An implicit column for the unnested elements (named by alias or 'value')
029 * 2. Support for WITH OFFSET which adds an 'offset' column
030 * 3. Support for STRUCT arrays which expose struct field names
031 */
032public class UnnestNamespace extends AbstractNamespace {
033
034    private final TTable unnestTable;
035    private final TUnnestClause unnestClause;
036    private final String alias;
037
038    /** The implicit column name for unnested elements */
039    private String implicitColumnName;
040
041    /** WITH OFFSET column name if present */
042    private String offsetColumnName;
043
044    /** Inferred columns for dynamic resolution */
045    private Set<String> inferredColumnNames;
046
047    /** Whether this UNNEST has an explicit alias (e.g., UNNEST(...) AS alias) */
048    private boolean hasExplicitAlias;
049
050    /** Whether the UNNEST array expression is a subquery (returns a struct that can be expanded) */
051    private boolean arrayExpressionIsSubquery;
052
053    public UnnestNamespace(TTable unnestTable, String alias, INameMatcher nameMatcher) {
054        super(unnestTable, nameMatcher);
055        this.unnestTable = unnestTable;
056        this.unnestClause = unnestTable.getUnnestClause();
057
058        // Determine if this is a real alias (from AS clause) or just the table name fallback
059        // For UNNEST tables, we should check if there's an explicit alias
060        String explicitAlias = unnestTable.getAliasName();
061        if (explicitAlias != null && !explicitAlias.isEmpty()) {
062            // Has explicit AS alias - use it for both table alias and implicit column
063            this.alias = explicitAlias;
064            this.implicitColumnName = explicitAlias;
065            this.hasExplicitAlias = true;
066        } else {
067            // No explicit alias - UNNEST without AS clause
068            // The implicit column name is "value" (BigQuery default for simple arrays)
069            this.alias = alias; // May be null or derived name
070            this.implicitColumnName = "value";
071            this.hasExplicitAlias = false;
072        }
073
074        // Check for WITH OFFSET
075        if (unnestClause != null && unnestClause.getWithOffset() != null) {
076            if (unnestClause.getWithOffsetAlais() != null &&
077                unnestClause.getWithOffsetAlais().getAliasName() != null) {
078                this.offsetColumnName = unnestClause.getWithOffsetAlais().getAliasName().toString();
079            } else {
080                this.offsetColumnName = "offset";
081            }
082        }
083
084        // Check if the array expression is a subquery (returns a struct)
085        // When UNNEST operates on a subquery that returns a struct, struct fields become accessible
086        // Example: UNNEST((SELECT struct_column FROM table))
087        this.arrayExpressionIsSubquery = false;
088        if (unnestClause != null) {
089            TExpression arrayExpr = unnestClause.getArrayExpr();
090            if (arrayExpr != null && arrayExpr.getExpressionType() == EExpressionType.subquery_t) {
091                this.arrayExpressionIsSubquery = true;
092            }
093        }
094    }
095
096    public UnnestNamespace(TTable unnestTable, String alias) {
097        this(unnestTable, alias, null);
098    }
099
100    @Override
101    public String getDisplayName() {
102        if (alias != null && !alias.isEmpty()) {
103            return alias;
104        }
105        return "(unnest table)";
106    }
107
108    @Override
109    public TTable getFinalTable() {
110        // UNNEST creates a virtual table, return the TTable representing it
111        return unnestTable;
112    }
113
114    @Override
115    public List<TTable> getAllFinalTables() {
116        List<TTable> tables = new ArrayList<>();
117        tables.add(unnestTable);
118        return tables;
119    }
120
121    @Override
122    protected void doValidate() {
123        columnSources = new LinkedHashMap<>();
124
125        // Check for explicit column aliases in table's alias clause
126        // For Presto/Trino syntax: UNNEST(array) AS t (col1, col2)
127        // The column names are in the alias clause's column list
128        boolean hasExplicitColumns = false;
129        if (unnestTable.getAliasClause() != null &&
130            unnestTable.getAliasClause().getColumns() != null &&
131            unnestTable.getAliasClause().getColumns().size() > 0) {
132
133            hasExplicitColumns = true;
134            for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) {
135                TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i);
136                if (colName != null) {
137                    String name = colName.toString();
138                    ColumnSource source = new ColumnSource(
139                        this,
140                        name,
141                        null,
142                        1.0,
143                        "unnest_explicit_column_alias"
144                    );
145                    columnSources.put(name, source);
146                }
147            }
148        }
149
150        // Add the implicit column for unnested elements (only if no explicit columns)
151        if (!hasExplicitColumns && implicitColumnName != null) {
152            ColumnSource implicitSource = new ColumnSource(
153                this,
154                implicitColumnName,
155                null,
156                1.0,
157                "unnest_implicit_column"
158            );
159            columnSources.put(implicitColumnName, implicitSource);
160        }
161
162        // Add WITH OFFSET column if present
163        if (offsetColumnName != null) {
164            ColumnSource offsetSource = new ColumnSource(
165                this,
166                offsetColumnName,
167                null,
168                1.0,
169                "unnest_offset_column"
170            );
171            columnSources.put(offsetColumnName, offsetSource);
172        }
173
174        // Add derived columns from STRUCT types if available
175        if (unnestClause != null && unnestClause.getDerivedColumnList() != null) {
176            for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) {
177                TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i);
178                if (derivedCol != null) {
179                    String colName = derivedCol.toString();
180                    ColumnSource derivedSource = new ColumnSource(
181                        this,
182                        colName,
183                        null,
184                        1.0,
185                        "unnest_struct_field"
186                    );
187                    columnSources.put(colName, derivedSource);
188                }
189            }
190        }
191    }
192
193    @Override
194    public boolean hasStarColumn() {
195        // UNNEST tables support star expansion for SELECT *
196        // but have fixed columns (implicit + offset + struct fields)
197        return true;
198    }
199
200    @Override
201    public boolean supportsDynamicInference() {
202        // Only allow dynamic inference for anonymous UNNEST (no explicit alias) that operates
203        // on a subquery returning a struct. In this case, struct fields become accessible
204        // as unqualified columns.
205        //
206        // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table))
207        // Here 'field' is a struct field that should be inferred from the UNNEST.
208        //
209        // For UNNEST with alias or UNNEST on a simple column, don't allow dynamic inference.
210        // This prevents external variables from being incorrectly attributed to the UNNEST.
211        return !hasExplicitAlias && arrayExpressionIsSubquery;
212    }
213
214    @Override
215    public boolean addInferredColumn(String columnName, double confidence, String evidence) {
216        if (columnName == null || columnName.isEmpty()) {
217            return false;
218        }
219
220        // Only allow inferred columns when dynamic inference is supported.
221        // UNNEST with explicit alias has a fixed set of columns (implicit column, offset, struct fields)
222        // and should NOT accept arbitrary inferred columns from star push-down or enhancement.
223        // This prevents external variables (like function parameters) from being incorrectly
224        // attributed to the UNNEST table.
225        if (!supportsDynamicInference()) {
226            return false;
227        }
228
229        if (inferredColumnNames == null) {
230            inferredColumnNames = new HashSet<>();
231        }
232
233        if (columnSources != null && columnSources.containsKey(columnName)) {
234            return false;
235        }
236
237        if (inferredColumnNames.contains(columnName)) {
238            return false;
239        }
240
241        inferredColumnNames.add(columnName);
242
243        // Also add to column sources
244        if (columnSources != null) {
245            ColumnSource source = new ColumnSource(
246                this,
247                columnName,
248                null,
249                confidence,
250                evidence
251            );
252            columnSources.put(columnName, source);
253        }
254
255        return true;
256    }
257
258    @Override
259    public Set<String> getInferredColumns() {
260        if (inferredColumnNames == null) {
261            return Collections.emptySet();
262        }
263        return Collections.unmodifiableSet(inferredColumnNames);
264    }
265
266    @Override
267    public ColumnLevel hasColumn(String columnName) {
268        ensureValidated();
269
270        // Check explicit columns (implicit column, offset, struct fields)
271        if (columnSources != null) {
272            for (String existingCol : columnSources.keySet()) {
273                if (nameMatcher.matches(existingCol, columnName)) {
274                    return ColumnLevel.EXISTS;
275                }
276            }
277        }
278
279        // Check inferred columns
280        if (inferredColumnNames != null && inferredColumnNames.contains(columnName)) {
281            return ColumnLevel.EXISTS;
282        }
283
284        // For anonymous UNNEST on a subquery (struct expansion), return MAYBE to allow inference.
285        // The actual column will be inferred in resolveColumn().
286        if (!hasExplicitAlias && arrayExpressionIsSubquery) {
287            return ColumnLevel.MAYBE;
288        }
289
290        // For other UNNEST types, don't auto-infer unknown columns.
291        // Unknown columns should resolve to outer scopes (correlated references)
292        return ColumnLevel.NOT_EXISTS;
293    }
294
295    @Override
296    public ColumnSource resolveColumn(String columnName) {
297        ensureValidated();
298
299        // Check explicit columns (implicit column, offset, struct fields)
300        ColumnSource source = super.resolveColumn(columnName);
301        if (source != null) {
302            return source;
303        }
304
305        // Check inferred columns (added via addInferredColumn or resolveQualifiedStructField)
306        if (inferredColumnNames != null && inferredColumnNames.contains(columnName)) {
307            return columnSources.get(columnName);
308        }
309
310        // For anonymous UNNEST on a subquery (struct expansion), dynamically infer struct fields.
311        // This allows struct field access without qualification when there's no alias.
312        // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table))
313        // Here 'field' is a struct field that should be inferred from the UNNEST.
314        if (!hasExplicitAlias && arrayExpressionIsSubquery) {
315            return resolveQualifiedStructField(columnName);
316        }
317
318        // For other UNNEST types, don't auto-infer unknown columns.
319        // Unknown unqualified columns should resolve to outer scopes or be marked as missed.
320        // For qualified struct field access (e.g., alias.field), use resolveQualifiedStructField().
321        return null;
322    }
323
324    /**
325     * Resolve a qualified struct field reference (e.g., "alias.field").
326     * This method infers struct fields when no metadata is available.
327     *
328     * <p>Use this method when resolving qualified column references like "x.field"
329     * where "x" is the UNNEST alias. This is appropriate for UNNEST of STRUCT arrays
330     * where the struct fields are accessed via the alias.
331     *
332     * <p>For unqualified column references, use {@link #resolveColumn(String)} which
333     * does NOT infer columns and returns null for unknown columns.
334     *
335     * @param columnName the column/field name (without the table qualifier)
336     * @return ColumnSource for the inferred struct field
337     */
338    public ColumnSource resolveQualifiedStructField(String columnName) {
339        if (columnName == null || columnName.isEmpty()) {
340            return null;
341        }
342
343        ensureValidated();
344
345        // Check if already known
346        ColumnSource existing = super.resolveColumn(columnName);
347        if (existing != null) {
348            return existing;
349        }
350
351        // Infer the column as a struct field
352        ColumnSource inferred = new ColumnSource(
353            this,
354            columnName,
355            null,  // No definition node
356            0.8,   // Moderate confidence - inferred from usage
357            "inferred_unnest_struct_field"
358        );
359
360        // Cache it for future lookups
361        if (inferredColumnNames == null) {
362            inferredColumnNames = new HashSet<>();
363        }
364        inferredColumnNames.add(columnName);
365        columnSources.put(columnName, inferred);
366
367        return inferred;
368    }
369
370    /**
371     * Get the array expression being unnested.
372     * This is used to track the source of the unnested data.
373     */
374    public TExpression getArrayExpression() {
375        return unnestClause != null ? unnestClause.getArrayExpr() : null;
376    }
377
378    /**
379     * Get the implicit column name for the unnested elements.
380     */
381    public String getImplicitColumnName() {
382        return implicitColumnName;
383    }
384
385    /**
386     * Get the offset column name if WITH OFFSET is present.
387     */
388    public String getOffsetColumnName() {
389        return offsetColumnName;
390    }
391
392    public TTable getUnnestTable() {
393        return unnestTable;
394    }
395
396    /**
397     * {@inheritDoc}
398     * For UnnestNamespace, returns the TTable representing the UNNEST expression.
399     * This is the immediate source table for columns resolved through this UNNEST.
400     */
401    @Override
402    public TTable getSourceTable() {
403        return unnestTable;
404    }
405
406    @Override
407    public String toString() {
408        return String.format("UnnestNamespace(%s, implicitCol=%s, offsetCol=%s)",
409            getDisplayName(),
410            implicitColumnName,
411            offsetColumnName);
412    }
413}