001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.EExpressionType; 004import gudusoft.gsqlparser.ETableSource; 005import gudusoft.gsqlparser.nodes.TExpression; 006import gudusoft.gsqlparser.nodes.TObjectName; 007import gudusoft.gsqlparser.nodes.TTable; 008import gudusoft.gsqlparser.nodes.TUnnestClause; 009import gudusoft.gsqlparser.resolver2.ColumnLevel; 010import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 011import gudusoft.gsqlparser.resolver2.model.ColumnSource; 012 013import java.util.ArrayList; 014import java.util.Collections; 015import java.util.HashSet; 016import java.util.LinkedHashMap; 017import java.util.List; 018import java.util.Set; 019 020/** 021 * Namespace representing an UNNEST table expression in BigQuery. 022 * 023 * UNNEST flattens an array into rows. For example: 024 * - UNNEST(['a', 'b', 'c']) creates a virtual table with rows 'a', 'b', 'c' 025 * - SELECT value FROM UNNEST(array_column) - 'value' is an implicit column name 026 * 027 * The UNNEST namespace provides: 028 * 1. An implicit column for the unnested elements (named by alias or 'value') 029 * 2. Support for WITH OFFSET which adds an 'offset' column 030 * 3. Support for STRUCT arrays which expose struct field names 031 */ 032public class UnnestNamespace extends AbstractNamespace { 033 034 private final TTable unnestTable; 035 private final TUnnestClause unnestClause; 036 private final String alias; 037 038 /** The implicit column name for unnested elements */ 039 private String implicitColumnName; 040 041 /** WITH OFFSET column name if present */ 042 private String offsetColumnName; 043 044 /** Inferred columns for dynamic resolution */ 045 private Set<String> inferredColumnNames; 046 047 /** Whether this UNNEST has an explicit alias (e.g., UNNEST(...) AS alias) */ 048 private boolean hasExplicitAlias; 049 050 /** Whether the UNNEST array expression is a subquery (returns a struct that can be expanded) */ 051 private boolean arrayExpressionIsSubquery; 052 053 public UnnestNamespace(TTable unnestTable, String alias, INameMatcher nameMatcher) { 054 super(unnestTable, nameMatcher); 055 this.unnestTable = unnestTable; 056 this.unnestClause = unnestTable.getUnnestClause(); 057 058 // Determine if this is a real alias (from AS clause) or just the table name fallback 059 // For UNNEST tables, we should check if there's an explicit alias 060 String explicitAlias = unnestTable.getAliasName(); 061 if (explicitAlias != null && !explicitAlias.isEmpty()) { 062 // Has explicit AS alias - use it for both table alias and implicit column 063 this.alias = explicitAlias; 064 this.implicitColumnName = explicitAlias; 065 this.hasExplicitAlias = true; 066 } else { 067 // No explicit alias - UNNEST without AS clause 068 // The implicit column name is "value" (BigQuery default for simple arrays) 069 this.alias = alias; // May be null or derived name 070 this.implicitColumnName = "value"; 071 this.hasExplicitAlias = false; 072 } 073 074 // Check for WITH OFFSET 075 if (unnestClause != null && unnestClause.getWithOffset() != null) { 076 if (unnestClause.getWithOffsetAlais() != null && 077 unnestClause.getWithOffsetAlais().getAliasName() != null) { 078 this.offsetColumnName = unnestClause.getWithOffsetAlais().getAliasName().toString(); 079 } else { 080 this.offsetColumnName = "offset"; 081 } 082 } 083 084 // Check if the array expression is a subquery (returns a struct) 085 // When UNNEST operates on a subquery that returns a struct, struct fields become accessible 086 // Example: UNNEST((SELECT struct_column FROM table)) 087 this.arrayExpressionIsSubquery = false; 088 if (unnestClause != null) { 089 TExpression arrayExpr = unnestClause.getArrayExpr(); 090 if (arrayExpr != null && arrayExpr.getExpressionType() == EExpressionType.subquery_t) { 091 this.arrayExpressionIsSubquery = true; 092 } 093 } 094 } 095 096 public UnnestNamespace(TTable unnestTable, String alias) { 097 this(unnestTable, alias, null); 098 } 099 100 @Override 101 public String getDisplayName() { 102 if (alias != null && !alias.isEmpty()) { 103 return alias; 104 } 105 return "(unnest table)"; 106 } 107 108 @Override 109 public TTable getFinalTable() { 110 // UNNEST creates a virtual table, return the TTable representing it 111 return unnestTable; 112 } 113 114 @Override 115 public List<TTable> getAllFinalTables() { 116 List<TTable> tables = new ArrayList<>(); 117 tables.add(unnestTable); 118 return tables; 119 } 120 121 @Override 122 protected void doValidate() { 123 columnSources = new LinkedHashMap<>(); 124 125 // Check for explicit column aliases in table's alias clause 126 // For Presto/Trino syntax: UNNEST(array) AS t (col1, col2) 127 // The column names are in the alias clause's column list 128 boolean hasExplicitColumns = false; 129 if (unnestTable.getAliasClause() != null && 130 unnestTable.getAliasClause().getColumns() != null && 131 unnestTable.getAliasClause().getColumns().size() > 0) { 132 133 hasExplicitColumns = true; 134 for (int i = 0; i < unnestTable.getAliasClause().getColumns().size(); i++) { 135 TObjectName colName = unnestTable.getAliasClause().getColumns().getObjectName(i); 136 if (colName != null) { 137 String name = colName.toString(); 138 ColumnSource source = new ColumnSource( 139 this, 140 name, 141 null, 142 1.0, 143 "unnest_explicit_column_alias" 144 ); 145 columnSources.put(name, source); 146 } 147 } 148 } 149 150 // Add the implicit column for unnested elements (only if no explicit columns) 151 if (!hasExplicitColumns && implicitColumnName != null) { 152 ColumnSource implicitSource = new ColumnSource( 153 this, 154 implicitColumnName, 155 null, 156 1.0, 157 "unnest_implicit_column" 158 ); 159 columnSources.put(implicitColumnName, implicitSource); 160 } 161 162 // Add WITH OFFSET column if present 163 if (offsetColumnName != null) { 164 ColumnSource offsetSource = new ColumnSource( 165 this, 166 offsetColumnName, 167 null, 168 1.0, 169 "unnest_offset_column" 170 ); 171 columnSources.put(offsetColumnName, offsetSource); 172 } 173 174 // Add derived columns from STRUCT types if available 175 if (unnestClause != null && unnestClause.getDerivedColumnList() != null) { 176 for (int i = 0; i < unnestClause.getDerivedColumnList().size(); i++) { 177 TObjectName derivedCol = unnestClause.getDerivedColumnList().getObjectName(i); 178 if (derivedCol != null) { 179 String colName = derivedCol.toString(); 180 ColumnSource derivedSource = new ColumnSource( 181 this, 182 colName, 183 null, 184 1.0, 185 "unnest_struct_field" 186 ); 187 columnSources.put(colName, derivedSource); 188 } 189 } 190 } 191 } 192 193 @Override 194 public boolean hasStarColumn() { 195 // UNNEST tables support star expansion for SELECT * 196 // but have fixed columns (implicit + offset + struct fields) 197 return true; 198 } 199 200 @Override 201 public boolean supportsDynamicInference() { 202 // Only allow dynamic inference for anonymous UNNEST (no explicit alias) that operates 203 // on a subquery returning a struct. In this case, struct fields become accessible 204 // as unqualified columns. 205 // 206 // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table)) 207 // Here 'field' is a struct field that should be inferred from the UNNEST. 208 // 209 // For UNNEST with alias or UNNEST on a simple column, don't allow dynamic inference. 210 // This prevents external variables from being incorrectly attributed to the UNNEST. 211 return !hasExplicitAlias && arrayExpressionIsSubquery; 212 } 213 214 @Override 215 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 216 if (columnName == null || columnName.isEmpty()) { 217 return false; 218 } 219 220 // Only allow inferred columns when dynamic inference is supported. 221 // UNNEST with explicit alias has a fixed set of columns (implicit column, offset, struct fields) 222 // and should NOT accept arbitrary inferred columns from star push-down or enhancement. 223 // This prevents external variables (like function parameters) from being incorrectly 224 // attributed to the UNNEST table. 225 if (!supportsDynamicInference()) { 226 return false; 227 } 228 229 if (inferredColumnNames == null) { 230 inferredColumnNames = new HashSet<>(); 231 } 232 233 if (columnSources != null && columnSources.containsKey(columnName)) { 234 return false; 235 } 236 237 if (inferredColumnNames.contains(columnName)) { 238 return false; 239 } 240 241 inferredColumnNames.add(columnName); 242 243 // Also add to column sources 244 if (columnSources != null) { 245 ColumnSource source = new ColumnSource( 246 this, 247 columnName, 248 null, 249 confidence, 250 evidence 251 ); 252 columnSources.put(columnName, source); 253 } 254 255 return true; 256 } 257 258 @Override 259 public Set<String> getInferredColumns() { 260 if (inferredColumnNames == null) { 261 return Collections.emptySet(); 262 } 263 return Collections.unmodifiableSet(inferredColumnNames); 264 } 265 266 @Override 267 public ColumnLevel hasColumn(String columnName) { 268 ensureValidated(); 269 270 // Check explicit columns (implicit column, offset, struct fields) 271 if (columnSources != null) { 272 for (String existingCol : columnSources.keySet()) { 273 if (nameMatcher.matches(existingCol, columnName)) { 274 return ColumnLevel.EXISTS; 275 } 276 } 277 } 278 279 // Check inferred columns 280 if (inferredColumnNames != null && inferredColumnNames.contains(columnName)) { 281 return ColumnLevel.EXISTS; 282 } 283 284 // For anonymous UNNEST on a subquery (struct expansion), return MAYBE to allow inference. 285 // The actual column will be inferred in resolveColumn(). 286 if (!hasExplicitAlias && arrayExpressionIsSubquery) { 287 return ColumnLevel.MAYBE; 288 } 289 290 // For other UNNEST types, don't auto-infer unknown columns. 291 // Unknown columns should resolve to outer scopes (correlated references) 292 return ColumnLevel.NOT_EXISTS; 293 } 294 295 @Override 296 public ColumnSource resolveColumn(String columnName) { 297 ensureValidated(); 298 299 // Check explicit columns (implicit column, offset, struct fields) 300 ColumnSource source = super.resolveColumn(columnName); 301 if (source != null) { 302 return source; 303 } 304 305 // Check inferred columns (added via addInferredColumn or resolveQualifiedStructField) 306 if (inferredColumnNames != null && inferredColumnNames.contains(columnName)) { 307 return columnSources.get(columnName); 308 } 309 310 // For anonymous UNNEST on a subquery (struct expansion), dynamically infer struct fields. 311 // This allows struct field access without qualification when there's no alias. 312 // Example: SELECT field FROM UNNEST((SELECT struct_col FROM table)) 313 // Here 'field' is a struct field that should be inferred from the UNNEST. 314 if (!hasExplicitAlias && arrayExpressionIsSubquery) { 315 return resolveQualifiedStructField(columnName); 316 } 317 318 // For other UNNEST types, don't auto-infer unknown columns. 319 // Unknown unqualified columns should resolve to outer scopes or be marked as missed. 320 // For qualified struct field access (e.g., alias.field), use resolveQualifiedStructField(). 321 return null; 322 } 323 324 /** 325 * Resolve a qualified struct field reference (e.g., "alias.field"). 326 * This method infers struct fields when no metadata is available. 327 * 328 * <p>Use this method when resolving qualified column references like "x.field" 329 * where "x" is the UNNEST alias. This is appropriate for UNNEST of STRUCT arrays 330 * where the struct fields are accessed via the alias. 331 * 332 * <p>For unqualified column references, use {@link #resolveColumn(String)} which 333 * does NOT infer columns and returns null for unknown columns. 334 * 335 * @param columnName the column/field name (without the table qualifier) 336 * @return ColumnSource for the inferred struct field 337 */ 338 public ColumnSource resolveQualifiedStructField(String columnName) { 339 if (columnName == null || columnName.isEmpty()) { 340 return null; 341 } 342 343 ensureValidated(); 344 345 // Check if already known 346 ColumnSource existing = super.resolveColumn(columnName); 347 if (existing != null) { 348 return existing; 349 } 350 351 // Infer the column as a struct field 352 ColumnSource inferred = new ColumnSource( 353 this, 354 columnName, 355 null, // No definition node 356 0.8, // Moderate confidence - inferred from usage 357 "inferred_unnest_struct_field" 358 ); 359 360 // Cache it for future lookups 361 if (inferredColumnNames == null) { 362 inferredColumnNames = new HashSet<>(); 363 } 364 inferredColumnNames.add(columnName); 365 columnSources.put(columnName, inferred); 366 367 return inferred; 368 } 369 370 /** 371 * Get the array expression being unnested. 372 * This is used to track the source of the unnested data. 373 */ 374 public TExpression getArrayExpression() { 375 return unnestClause != null ? unnestClause.getArrayExpr() : null; 376 } 377 378 /** 379 * Get the implicit column name for the unnested elements. 380 */ 381 public String getImplicitColumnName() { 382 return implicitColumnName; 383 } 384 385 /** 386 * Get the offset column name if WITH OFFSET is present. 387 */ 388 public String getOffsetColumnName() { 389 return offsetColumnName; 390 } 391 392 public TTable getUnnestTable() { 393 return unnestTable; 394 } 395 396 /** 397 * {@inheritDoc} 398 * For UnnestNamespace, returns the TTable representing the UNNEST expression. 399 * This is the immediate source table for columns resolved through this UNNEST. 400 */ 401 @Override 402 public TTable getSourceTable() { 403 return unnestTable; 404 } 405 406 @Override 407 public String toString() { 408 return String.format("UnnestNamespace(%s, implicitCol=%s, offsetCol=%s)", 409 getDisplayName(), 410 implicitColumnName, 411 offsetColumnName); 412 } 413}