001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.nodes.TResultColumn; 004import gudusoft.gsqlparser.nodes.TResultColumnList; 005import gudusoft.gsqlparser.nodes.TTable; 006import gudusoft.gsqlparser.resolver2.ColumnLevel; 007import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 008import gudusoft.gsqlparser.resolver2.model.ColumnSource; 009import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 010 011import java.util.*; 012 013/** 014 * Namespace representing a UNION/INTERSECT/EXCEPT query. 015 * 016 * Key characteristics: 017 * - Schema is defined by the FIRST branch (SQL standard) 018 * - Columns must be pushed down to ALL branches 019 * - hasStarColumn() returns true if ANY branch has SELECT * 020 * - addInferredColumn() propagates to ALL branches 021 * 022 * Example: 023 * FROM ( 024 * SELECT * FROM table_1 025 * UNION ALL 026 * SELECT * FROM table_2 027 * UNION ALL 028 * SELECT * FROM table_3 029 * ) Combined 030 * 031 * When outer query references "col_1", it should be pushed to ALL branches. 032 */ 033public class UnionNamespace extends AbstractNamespace { 034 035 private final TSelectSqlStatement unionQuery; 036 private final String alias; 037 038 /** All branches of the UNION (flattened) */ 039 private final List<TSelectSqlStatement> allBranches; 040 041 /** Namespace for each branch */ 042 private final List<SubqueryNamespace> branchNamespaces; 043 044 /** Inferred columns from star push-down */ 045 private Map<String, ColumnSource> inferredColumns; 046 047 /** Track inferred column names */ 048 private Set<String> inferredColumnNames; 049 050 public UnionNamespace(TSelectSqlStatement unionQuery, 051 String alias, 052 INameMatcher nameMatcher) { 053 super(unionQuery, nameMatcher); 054 this.unionQuery = unionQuery; 055 this.alias = alias; 056 057 // Flatten all UNION branches 058 this.allBranches = new ArrayList<>(); 059 flattenUnionBranches(unionQuery, allBranches); 060 061 // Create namespace for each branch 062 this.branchNamespaces = new ArrayList<>(); 063 for (int i = 0; i < allBranches.size(); i++) { 064 TSelectSqlStatement branch = allBranches.get(i); 065 SubqueryNamespace branchNs = new SubqueryNamespace(branch, "branch_" + i, nameMatcher); 066 branchNamespaces.add(branchNs); 067 } 068 } 069 070 /** 071 * Iteratively flatten UNION branches into a list. 072 * Handles nested UNION structures like: (A UNION B) UNION C 073 * Uses explicit stack to avoid StackOverflow on deeply nested chains. 074 */ 075 private void flattenUnionBranches(TSelectSqlStatement stmt, List<TSelectSqlStatement> branches) { 076 if (stmt == null) { 077 return; 078 } 079 080 Deque<TSelectSqlStatement> stack = new ArrayDeque<>(); 081 stack.push(stmt); 082 083 while (!stack.isEmpty()) { 084 TSelectSqlStatement current = stack.pop(); 085 if (current == null) { 086 continue; 087 } 088 if (current.isCombinedQuery()) { 089 // Push right first so left is processed first (LIFO) 090 stack.push(current.getRightStmt()); 091 stack.push(current.getLeftStmt()); 092 } else { 093 branches.add(current); 094 } 095 } 096 } 097 098 @Override 099 public String getDisplayName() { 100 return alias != null ? alias : "<union>"; 101 } 102 103 @Override 104 public TTable getFinalTable() { 105 // For UNION, return the first branch's final table 106 // This is used for single-table resolution 107 if (!branchNamespaces.isEmpty()) { 108 return branchNamespaces.get(0).getFinalTable(); 109 } 110 return null; 111 } 112 113 @Override 114 public List<TTable> getAllFinalTables() { 115 // Return tables from ALL branches 116 List<TTable> allTables = new ArrayList<>(); 117 for (SubqueryNamespace branchNs : branchNamespaces) { 118 branchNs.validate(); 119 List<TTable> branchTables = branchNs.getAllFinalTables(); 120 allTables.addAll(branchTables); 121 } 122 return allTables; 123 } 124 125 @Override 126 protected void doValidate() { 127 // Extract columns from first branch's SELECT list (SQL standard) 128 columnSources = new LinkedHashMap<>(); 129 130 if (allBranches.isEmpty()) { 131 return; 132 } 133 134 // Validate all branch namespaces 135 for (SubqueryNamespace branchNs : branchNamespaces) { 136 branchNs.validate(); 137 } 138 139 // Get columns from first branch (defines schema) 140 TSelectSqlStatement firstBranch = allBranches.get(0); 141 TResultColumnList selectList = firstBranch.getResultColumnList(); 142 if (selectList == null) { 143 return; 144 } 145 146 for (int i = 0; i < selectList.size(); i++) { 147 TResultColumn resultCol = selectList.getResultColumn(i); 148 String colName = getColumnName(resultCol); 149 if (colName == null) { 150 colName = "col_" + (i + 1); 151 } 152 153 // For each column position, collect tables from ALL branches. 154 // UNION/MINUS columns are matched by POSITION, not by name. 155 // For each branch, check if the result column at that position is a simple column reference. 156 // If so, include all tables from that branch as potential sources. 157 List<TTable> columnTables = new ArrayList<>(); 158 159 for (int branchIdx = 0; branchIdx < allBranches.size(); branchIdx++) { 160 TSelectSqlStatement branch = allBranches.get(branchIdx); 161 TResultColumnList branchSelectList = branch.getResultColumnList(); 162 if (branchSelectList == null || i >= branchSelectList.size()) { 163 continue; 164 } 165 166 TResultColumn branchResultCol = branchSelectList.getResultColumn(i); 167 if (branchResultCol == null || branchResultCol.getExpr() == null) { 168 continue; 169 } 170 171 // Check if this result column is a simple column reference (not NULL or expression) 172 gudusoft.gsqlparser.nodes.TExpression expr = branchResultCol.getExpr(); 173 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 174 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 175 if (objName != null) { 176 // Get the column name at this position in the branch 177 String branchColName = objName.getColumnNameOnly(); 178 179 // For UNION data lineage, only include tables where the column name 180 // in this branch matches the column name from the first branch. 181 // This prevents incorrect associations like CDS_APP.bankcode when 182 // CDS_APP branch actually has c_mandant at that position. 183 boolean columnNameMatches = nameMatcher.matches(branchColName, colName); 184 185 // First try to get the source table from the column reference itself 186 TTable sourceTable = objName.getSourceTable(); 187 if (sourceTable != null && !columnTables.contains(sourceTable) && columnNameMatches) { 188 // Phase 1 resolved this column - add if column name matches 189 columnTables.add(sourceTable); 190 } else if (sourceTable == null && columnNameMatches) { 191 // If sourceTable is not set (Phase 1 resolution hasn't happened), 192 // check if there's a qualified reference (e.g., t.col) 193 String tableQualifier = objName.getTableString(); 194 if (tableQualifier != null && !tableQualifier.isEmpty() && branch.tables != null) { 195 // Qualified column - try to find the table by alias or name 196 for (int ti = 0; ti < branch.tables.size(); ti++) { 197 TTable t = branch.tables.getTable(ti); 198 if (t != null) { 199 String alias = t.getAliasName(); 200 String name = t.getTableName() != null ? t.getTableName().toString() : null; 201 if ((alias != null && alias.equalsIgnoreCase(tableQualifier)) || 202 (name != null && name.equalsIgnoreCase(tableQualifier))) { 203 if (!columnTables.contains(t)) { 204 columnTables.add(t); 205 } 206 break; 207 } 208 } 209 } 210 } else if (branch.tables != null) { 211 // Unqualified column reference - add only the first non-subquery/join table 212 for (int ti = 0; ti < branch.tables.size(); ti++) { 213 TTable t = branch.tables.getTable(ti); 214 if (t != null && !columnTables.contains(t)) { 215 // Skip subqueries and joins - they're not final tables 216 if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery && 217 t.getTableType() != gudusoft.gsqlparser.ETableSource.join) { 218 columnTables.add(t); 219 break; // Only add the first table for unqualified columns 220 } 221 } 222 } 223 } 224 } 225 } 226 } 227 // For non-column expressions (NULL, functions, etc.), don't add any tables 228 } 229 230 // Create column source with candidateTables from branches that have the column 231 // Always pass the list (even if empty) so getAllFinalTables() knows we explicitly 232 // determined the candidate tables rather than needing to delegate to namespace 233 ColumnSource source = new ColumnSource( 234 this, 235 colName, 236 resultCol, 237 1.0, 238 "union_column", 239 null, // overrideTable 240 columnTables // Pass empty list when no tables match 241 ); 242 243 columnSources.put(colName, source); 244 } 245 } 246 247 /** 248 * Extract column name from TResultColumn. 249 */ 250 private String getColumnName(TResultColumn resultCol) { 251 // Check for alias 252 if (resultCol.getAliasClause() != null && 253 resultCol.getAliasClause().getAliasName() != null) { 254 return resultCol.getAliasClause().getAliasName().toString(); 255 } 256 257 // Check for simple column reference 258 if (resultCol.getExpr() != null) { 259 gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr(); 260 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 261 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 262 if (objName != null) { 263 return objName.getColumnNameOnly(); 264 } 265 } 266 } 267 268 return null; 269 } 270 271 @Override 272 public ColumnLevel hasColumn(String columnName) { 273 ensureValidated(); 274 275 // Check in explicit columns from first branch 276 for (String existingCol : columnSources.keySet()) { 277 if (nameMatcher.matches(existingCol, columnName)) { 278 return ColumnLevel.EXISTS; 279 } 280 } 281 282 // Check in inferred columns 283 if (inferredColumns != null && inferredColumns.containsKey(columnName)) { 284 return ColumnLevel.EXISTS; 285 } 286 287 // If any branch has SELECT *, unknown columns MAYBE exist 288 if (hasStarColumn()) { 289 return ColumnLevel.MAYBE; 290 } 291 292 return ColumnLevel.NOT_EXISTS; 293 } 294 295 @Override 296 public ColumnSource resolveColumn(String columnName) { 297 ensureValidated(); 298 299 // First check explicit columns from first branch 300 ColumnSource source = super.resolveColumn(columnName); 301 if (source != null) { 302 return source; 303 } 304 305 // Then check inferred columns 306 if (inferredColumns != null) { 307 for (Map.Entry<String, ColumnSource> entry : inferredColumns.entrySet()) { 308 if (nameMatcher.matches(entry.getKey(), columnName)) { 309 return entry.getValue(); 310 } 311 } 312 } 313 314 // If has star column, auto-infer this column 315 if (hasStarColumn()) { 316 boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_outer_reference"); 317 if (added && inferredColumns != null) { 318 ColumnSource inferredSource = inferredColumns.get(columnName); 319 if (inferredSource != null) { 320 return inferredSource; 321 } 322 } 323 } 324 325 return null; 326 } 327 328 @Override 329 public TSelectSqlStatement getSelectStatement() { 330 return unionQuery; 331 } 332 333 @Override 334 public boolean hasStarColumn() { 335 // Returns true if ANY branch has SELECT * 336 for (SubqueryNamespace branchNs : branchNamespaces) { 337 if (branchNs.hasStarColumn()) { 338 return true; 339 } 340 } 341 return false; 342 } 343 344 @Override 345 public boolean supportsDynamicInference() { 346 return hasStarColumn(); 347 } 348 349 @Override 350 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 351 if (columnName == null || columnName.isEmpty()) { 352 return false; 353 } 354 355 // Initialize maps if needed 356 if (inferredColumns == null) { 357 inferredColumns = new LinkedHashMap<>(); 358 } 359 if (inferredColumnNames == null) { 360 inferredColumnNames = new HashSet<>(); 361 } 362 363 // Check if already exists in explicit columns 364 if (columnSources != null && columnSources.containsKey(columnName)) { 365 return false; 366 } 367 368 // Check if already inferred 369 if (inferredColumns.containsKey(columnName)) { 370 return false; 371 } 372 373 // Collect final tables from ALL branches that support dynamic inference (have SELECT *) 374 // For data lineage, we need to track that columns could come from any UNION branch 375 // The formatter will output all candidates for UNION columns when isCandidatesFromUnion is true 376 java.util.List<TTable> candidateTables = new java.util.ArrayList<>(); 377 for (SubqueryNamespace branchNs : branchNamespaces) { 378 // Only collect from branches that could have inferred columns 379 if (!branchNs.supportsDynamicInference()) { 380 continue; 381 } 382 branchNs.validate(); 383 java.util.List<TTable> branchTables = branchNs.getAllFinalTables(); 384 for (TTable table : branchTables) { 385 if (table != null && !candidateTables.contains(table)) { 386 candidateTables.add(table); 387 } 388 } 389 } 390 391 // Create inferred column source for this union namespace WITH candidate tables 392 ColumnSource source = new ColumnSource( 393 this, 394 columnName, 395 null, 396 confidence, 397 evidence, 398 null, // overrideTable is null for UNION columns 399 candidateTables.isEmpty() ? null : candidateTables 400 ); 401 402 inferredColumns.put(columnName, source); 403 inferredColumnNames.add(columnName); 404 405 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 406 System.out.println("[UnionNamespace] Added '" + columnName + "' to " + alias + 407 ", propagating to " + branchNamespaces.size() + " branches"); 408 } 409 410 // CRITICAL: Propagate to branches that support dynamic inference (have SELECT *) 411 // Only propagate to branches with star columns - branches with explicit column lists 412 // either have the column explicitly or don't have it at all. 413 for (SubqueryNamespace branchNs : branchNamespaces) { 414 // Only propagate to branches that can accept inferred columns 415 if (!branchNs.supportsDynamicInference()) { 416 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 417 System.out.println("[UnionNamespace] Skipping branch " + branchNs.getDisplayName() + 418 " (no star column)"); 419 } 420 continue; 421 } 422 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 423 System.out.println("[UnionNamespace] Propagating '" + columnName + "' to branch " + branchNs.getDisplayName()); 424 } 425 branchNs.addInferredColumn(columnName, confidence, evidence + "_union_propagate"); 426 } 427 428 return true; 429 } 430 431 @Override 432 public Set<String> getInferredColumns() { 433 if (inferredColumnNames == null) { 434 return Collections.emptySet(); 435 } 436 return Collections.unmodifiableSet(inferredColumnNames); 437 } 438 439 /** 440 * Get all branch namespaces. 441 * Useful for external code that needs to iterate over branches. 442 */ 443 public List<SubqueryNamespace> getBranchNamespaces() { 444 return Collections.unmodifiableList(branchNamespaces); 445 } 446 447 /** 448 * Get all branch SELECT statements. 449 */ 450 public List<TSelectSqlStatement> getAllBranches() { 451 return Collections.unmodifiableList(allBranches); 452 } 453 454 /** 455 * Get the number of UNION branches. 456 */ 457 public int getBranchCount() { 458 return allBranches.size(); 459 } 460 461 @Override 462 public String toString() { 463 int totalColumns = (columnSources != null ? columnSources.size() : 0) + 464 (inferredColumns != null ? inferredColumns.size() : 0); 465 return "UnionNamespace(" + getDisplayName() + 466 ", branches=" + allBranches.size() + 467 ", columns=" + totalColumns + 468 ", inferred=" + (inferredColumns != null ? inferredColumns.size() : 0) + ")"; 469 } 470}