001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.EExpressionType; 004import gudusoft.gsqlparser.ETableSource; 005import gudusoft.gsqlparser.nodes.TExpression; 006import gudusoft.gsqlparser.nodes.TObjectName; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.nodes.TUnnestClause; 009import gudusoft.gsqlparser.resolver2.ColumnLevel; 010import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 011import gudusoft.gsqlparser.resolver2.model.ColumnSource; 012 013import java.util.ArrayList; 014import java.util.Collections; 015import java.util.HashSet; 016import java.util.LinkedHashMap; 017import java.util.List; 018import java.util.Set; 019 020/** 021 * Namespace representing an UNNEST table expression in BigQuery. 022 * 023 * UNNEST flattens an array into rows. For example: 024 * - UNNEST(['a', 'b', 'c']) creates a virtual table with rows 'a', 'b', 'c' 025 * - SELECT value FROM UNNEST(array_column) - 'value' is an implicit column name 026 * 027 * The UNNEST namespace provides: 028 * 1. An implicit column for the unnested elements (named by alias or 'value') 029 * 2. Support for WITH OFFSET which adds an 'offset' column 030 * 3. Support for STRUCT arrays which expose struct field names 031 */ 032public class UnnestNamespace extends AbstractNamespace { 033 034 private final TTable unnestTable; 035 private final TUnnestClause unnestClause; 036 private final String alias; 037 038 /** The implicit column name for unnested elements */ 039 private String implicitColumnName; 040 041 /** WITH OFFSET column name if present */ 042 private String offsetColumnName; 043 044 /** Inferred columns for dynamic resolution */ 045 private Set<String> inferredColumnNames; 046 047 /** Whether this UNNEST has an explicit alias (e.g., UNNEST(...) AS alias) */ 048 private boolean hasExplicitAlias; 049 050 /** Whether the UNNEST array expression is a subquery (returns a struct that can be expanded) */ 051 private boolean arrayExpressionIsSubquery; 052 053 public UnnestNamespace(TTable unnestTable, String alias, INameMatcher nameMatcher) { 054 super(unnestTable, nameMatcher); 055 this.unnestTable = unnestTable; 056 this.unnestClause = unnestTable.getUnnestClause(); 057 058 // Determine if this is a real alias (from AS clause) or just the table name fallback 059 // For UNNEST tables, we should check if there's an explicit alias 060 String explicitAlias = unnestTable.getAliasName(); 061 if (explicitAlias != null && !explicitAlias.isEmpty()) { 062 // Has explicit AS alias - use it for both table alias and implicit column 063 this.alias = explicitAlias; 064 this.implicitColumnName = explicitAlias; 065 this.hasExplicitAlias = true; 066 } else { 067 // No explicit alias - UNNEST without AS clause 068 // The implicit column name is "value" (BigQuery default for simple arrays) 069 this.alias = alias; // May be null or derived name 070 this.implicitColumnName = "value"; 071 this.hasExplicitAlias = false; 072 } 073 074 // Check for WITH OFFSET 075 if (unnestClause != null && unnestClause.getWithOffset() != null) { 076 if (unnestClause.getWithOffsetAlais() != null && 077 unnestClause.getWithOffsetAlais().getAliasName() != null) { 078 this.offsetColumnName = unnestClause.getWithOffsetAlais().getAliasName().toString(); 079 } else { 080 this.offsetColumnName = "offset"; 081 } 082 } 083 084 // Check if the array expression is a subquery (returns a struct) 085 // When UNNEST operates on a subquery that returns a struct, struct fields become accessible 086 // Example: UNNEST((SELECT struct_column FROM table)) 087 this.arrayExpressionIsSubquery = false; 088 if (unnestClause != null) { 089 TExpression arrayExpr = unnestClause.getArrayExpr(); 090 if (arrayExpr != null && arrayExpr.getExpressionType() == EExpressionType.subquery_t) { 091 this.arrayExpressionIsSubquery = true; 092 } 093 } 094 } 095 096 public UnnestNamespace(TTable unnestTable, String alias) { 097 this(unnestTable, alias, null); 098 } 099 100 @Override 101 public String getDisplayName() { 102 if (alias != null && !alias.isEmpty()) { 103 return alias; 104 } 105 return "(unnest table)"; 106 } 107 108 @Override 109 public TTable getFinalTable() { 110 // UNNEST creates a virtual table, return the TTable representing it 111 return unnestTable; 112 } 113 114 @Override 115 public List<TTable> getAllFinalTables() { 116 List<TTable> tables = new ArrayList<>(); 117 tables.add(unnestTable); 118 return tables; 119 } 120 121 @Override 122 protected void doValidate() { 123 columnSources = new LinkedHashMap<>(); 124 125 // Check for explicit column aliases in table's alias clause 126 // For Presto/Trino syntax: UNNEST(array) AS t (col1, col2) 127 // The column names are in the alias clause's column list 128 boolean hasExplicitColumns = false; 129 if (unnestTable.getAliasClause() != null && 130 unnestTable.getAliasClause().getColumns() != null && 131 unnestTable.getAliasClause().getColumns().size() > 0) { 132 133 hasExplicitColumns = true; 134 for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) { 135 TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i); 136 if (colName != null) { 137 String name = colName.toString(); 138 ColumnSource source = new ColumnSource( 139 this, 140 name, 141 null, 142 1.0, 143 "unnest_explicit_column_alias" 144 ); 145 columnSources.put(name, source); 146 } 147 } 148 } 149 150 // Add the implicit column for unnested elements (only if no explicit columns) 151 if (!hasExplicitColumns && implicitColumnName != null) { 152 ColumnSource implicitSource = new ColumnSource( 153 this, 154 implicitColumnName, 155 null, 156 1.0, 157 "unnest_implicit_column" 158 ); 159 columnSources.put(implicitColumnName, implicitSource); 160 } 161 162 // Add WITH OFFSET column if present 163 if (offsetColumnName != null) { 164 ColumnSource offsetSource = new ColumnSource( 165 this, 166 offsetColumnName, 167 null, 168 1.0, 169 "unnest_offset_column" 170 ); 171 columnSources.put(offsetColumnName, offsetSource); 172 } 173 174 // Add derived columns from STRUCT types if available 175 if (unnestClause != null && unnestClause.getDerivedColumnList() != null) { 176 for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) { 177 TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i); 178 if (derivedCol != null) { 179 String colName = derivedCol.toString(); 180 ColumnSource derivedSource = new ColumnSource( 181 this, 182 colName, 183 null, 184 1.0, 185 "unnest_struct_field" 186 ); 187 columnSources.put(colName, derivedSource); 188 } 189 } 190 } 191 } 192 193 @Override 194 public boolean hasStarColumn() { 195 // UNNEST tables support star expansion for SELECT * 196 // but have fixed columns (implicit + offset + struct fields) 197 return true; 198 } 199 200 @Override 201 public boolean supportsDynamicInference() { 202 // Only allow dynamic inference for anonymous UNNEST (no explicit alias) that operates 203 // on a subquery returning a struct. In this case, struct fields become accessible 204 // as unqualified columns. 205 // 206 // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table)) 207 // Here 'field' is a struct field that should be inferred from the UNNEST. 208 // 209 // For UNNEST with alias or UNNEST on a simple column, don't allow dynamic inference. 210 // This prevents external variables from being incorrectly attributed to the UNNEST. 211 return !hasExplicitAlias && arrayExpressionIsSubquery; 212 } 213 214 @Override 215 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 216 if (columnName == null || columnName.isEmpty()) { 217 return false; 218 } 219 220 // Only allow inferred columns when dynamic inference is supported. 221 // UNNEST with explicit alias has a fixed set of columns (implicit column, offset, struct fields) 222 // and should NOT accept arbitrary inferred columns from star push-down or enhancement. 223 // This prevents external variables (like function parameters) from being incorrectly 224 // attributed to the UNNEST table. 225 if (!supportsDynamicInference()) { 226 return false; 227 } 228 229 if (inferredColumnNames == null) { 230 inferredColumnNames = new HashSet<>(); 231 } 232 233 // Slice S1: dedupe via matcher-aware helpers so per-vendor identifier 234 // rules (BigQuery columns case-insensitive; Oracle quoted vs unquoted) 235 // govern collision detection. 236 if (containsColumnByMatcher(columnSources, columnName)) { 237 return false; 238 } 239 240 if (containsColumnNameByMatcher(inferredColumnNames, columnName)) { 241 return false; 242 } 243 244 inferredColumnNames.add(columnName); 245 246 // Also add to column sources 247 if (columnSources != null) { 248 ColumnSource source = new ColumnSource( 249 this, 250 columnName, 251 null, 252 confidence, 253 evidence 254 ); 255 columnSources.put(columnName, source); 256 } 257 258 return true; 259 } 260 261 @Override 262 public Set<String> getInferredColumns() { 263 if (inferredColumnNames == null) { 264 return Collections.emptySet(); 265 } 266 return Collections.unmodifiableSet(inferredColumnNames); 267 } 268 269 @Override 270 public ColumnLevel hasColumn(String columnName) { 271 ensureValidated(); 272 273 // Check explicit columns (implicit column, offset, struct fields). 274 if (containsColumnByMatcher(columnSources, columnName)) { 275 return ColumnLevel.EXISTS; 276 } 277 278 // Check inferred columns. Slice S1: route through matcher-aware 279 // helper so case-only-different references don't drop to NOT_EXISTS 280 // on case-insensitive vendors. 281 if (containsColumnNameByMatcher(inferredColumnNames, columnName)) { 282 return ColumnLevel.EXISTS; 283 } 284 285 // For anonymous UNNEST on a subquery (struct expansion), return MAYBE to allow inference. 286 // The actual column will be inferred in resolveColumn(). 287 if (!hasExplicitAlias && arrayExpressionIsSubquery) { 288 return ColumnLevel.MAYBE; 289 } 290 291 // For other UNNEST types, don't auto-infer unknown columns. 292 // Unknown columns should resolve to outer scopes (correlated references) 293 return ColumnLevel.NOT_EXISTS; 294 } 295 296 @Override 297 public ColumnSource resolveColumn(String columnName) { 298 ensureValidated(); 299 300 // Check explicit columns (implicit column, offset, struct fields) 301 ColumnSource source = super.resolveColumn(columnName); 302 if (source != null) { 303 return source; 304 } 305 306 // Check inferred columns (added via addInferredColumn or resolveQualifiedStructField). 307 // Slice S1: matcher-aware containment check; lookup in columnSources is 308 // matcher-aware via super.resolveColumn() above. 309 if (containsColumnNameByMatcher(inferredColumnNames, columnName)) { 310 ColumnSource viaMatcher = super.resolveColumn(columnName); 311 if (viaMatcher != null) { 312 return viaMatcher; 313 } 314 return columnSources != null ? columnSources.get(columnName) : null; 315 } 316 317 // For anonymous UNNEST on a subquery (struct expansion), dynamically infer struct fields. 318 // This allows struct field access without qualification when there's no alias. 319 // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table)) 320 // Here 'field' is a struct field that should be inferred from the UNNEST. 321 if (!hasExplicitAlias && arrayExpressionIsSubquery) { 322 return resolveQualifiedStructField(columnName); 323 } 324 325 // For other UNNEST types, don't auto-infer unknown columns. 326 // Unknown unqualified columns should resolve to outer scopes or be marked as missed. 327 // For qualified struct field access (e.g., alias.field), use resolveQualifiedStructField(). 328 return null; 329 } 330 331 /** 332 * Resolve a qualified struct field reference (e.g., "alias.field"). 333 * This method infers struct fields when no metadata is available. 334 * 335 * <p>Use this method when resolving qualified column references like "x.field" 336 * where "x" is the UNNEST alias. This is appropriate for UNNEST of STRUCT arrays 337 * where the struct fields are accessed via the alias. 338 * 339 * <p>For unqualified column references, use {@link #resolveColumn(String)} which 340 * does NOT infer columns and returns null for unknown columns. 341 * 342 * @param columnName the column/field name (without the table qualifier) 343 * @return ColumnSource for the inferred struct field 344 */ 345 public ColumnSource resolveQualifiedStructField(String columnName) { 346 if (columnName == null || columnName.isEmpty()) { 347 return null; 348 } 349 350 ensureValidated(); 351 352 // Check if already known 353 ColumnSource existing = super.resolveColumn(columnName); 354 if (existing != null) { 355 return existing; 356 } 357 358 // Infer the column as a struct field 359 ColumnSource inferred = new ColumnSource( 360 this, 361 columnName, 362 null, // No definition node 363 0.8, // Moderate confidence - inferred from usage 364 "inferred_unnest_struct_field" 365 ); 366 367 // Cache it for future lookups 368 if (inferredColumnNames == null) { 369 inferredColumnNames = new HashSet<>(); 370 } 371 inferredColumnNames.add(columnName); 372 columnSources.put(columnName, inferred); 373 374 return inferred; 375 } 376 377 /** 378 * Get the array expression being unnested. 379 * This is used to track the source of the unnested data. 380 */ 381 public TExpression getArrayExpression() { 382 return unnestClause != null ? unnestClause.getArrayExpr() : null; 383 } 384 385 /** 386 * Get the implicit column name for the unnested elements. 387 */ 388 public String getImplicitColumnName() { 389 return implicitColumnName; 390 } 391 392 /** 393 * Get the offset column name if WITH OFFSET is present. 394 */ 395 public String getOffsetColumnName() { 396 return offsetColumnName; 397 } 398 399 public TTable getUnnestTable() { 400 return unnestTable; 401 } 402 403 /** 404 * {@inheritDoc} 405 * For UnnestNamespace, returns the TTable representing the UNNEST expression. 406 * This is the immediate source table for columns resolved through this UNNEST. 407 */ 408 @Override 409 public TTable getSourceTable() { 410 return unnestTable; 411 } 412 413 @Override 414 public String toString() { 415 return String.format("UnnestNamespace(%s, implicitCol=%s, offsetCol=%s)", 416 getDisplayName(), 417 implicitColumnName, 418 offsetColumnName); 419 } 420}