001package gudusoft.gsqlparser.resolver2.namespace; 002 003import gudusoft.gsqlparser.nodes.TResultColumn; 004import gudusoft.gsqlparser.nodes.TResultColumnList; 005import gudusoft.gsqlparser.nodes.TTable; 006import gudusoft.gsqlparser.resolver2.ColumnLevel; 007import gudusoft.gsqlparser.resolver2.matcher.INameMatcher; 008import gudusoft.gsqlparser.resolver2.model.ColumnSource; 009import gudusoft.gsqlparser.stmt.TSelectSqlStatement; 010 011import java.util.*; 012 013/** 014 * Namespace representing a UNION/INTERSECT/EXCEPT query. 015 * 016 * Key characteristics: 017 * - Schema is defined by the FIRST branch (SQL standard) 018 * - Columns must be pushed down to ALL branches 019 * - hasStarColumn() returns true if ANY branch has SELECT * 020 * - addInferredColumn() propagates to ALL branches 021 * 022 * Example: 023 * FROM ( 024 * SELECT * FROM table_1 025 * UNION ALL 026 * SELECT * FROM table_2 027 * UNION ALL 028 * SELECT * FROM table_3 029 * ) Combined 030 * 031 * When outer query references "col_1", it should be pushed to ALL branches. 032 */ 033public class UnionNamespace extends AbstractNamespace { 034 035 private final TSelectSqlStatement unionQuery; 036 private final String alias; 037 038 /** All branches of the UNION (flattened) */ 039 private final List<TSelectSqlStatement> allBranches; 040 041 /** Namespace for each branch */ 042 private final List<SubqueryNamespace> branchNamespaces; 043 044 /** Inferred columns from star push-down */ 045 private Map<String, ColumnSource> inferredColumns; 046 047 /** Track inferred column names */ 048 private Set<String> inferredColumnNames; 049 050 public UnionNamespace(TSelectSqlStatement unionQuery, 051 String alias, 052 INameMatcher nameMatcher) { 053 super(unionQuery, nameMatcher); 054 this.unionQuery = unionQuery; 055 this.alias = alias; 056 057 // Flatten all UNION branches 058 this.allBranches = new ArrayList<>(); 059 flattenUnionBranches(unionQuery, allBranches); 060 061 // Create namespace for each branch 062 this.branchNamespaces = new ArrayList<>(); 063 for (int i = 0; i < allBranches.size(); i++) { 064 TSelectSqlStatement branch = allBranches.get(i); 065 SubqueryNamespace branchNs = new SubqueryNamespace(branch, "branch_" + i, nameMatcher); 066 branchNamespaces.add(branchNs); 067 } 068 } 069 070 /** 071 * Recursively flatten UNION branches into a list. 072 * Handles nested UNION structures like: (A UNION B) UNION C 073 */ 074 private void flattenUnionBranches(TSelectSqlStatement stmt, List<TSelectSqlStatement> branches) { 075 if (stmt == null) { 076 return; 077 } 078 079 if (stmt.isCombinedQuery()) { 080 // Recursively flatten left side 081 flattenUnionBranches(stmt.getLeftStmt(), branches); 082 // Recursively flatten right side 083 flattenUnionBranches(stmt.getRightStmt(), branches); 084 } else { 085 // This is a leaf branch - add it 086 branches.add(stmt); 087 } 088 } 089 090 @Override 091 public String getDisplayName() { 092 return alias != null ? alias : "<union>"; 093 } 094 095 @Override 096 public TTable getFinalTable() { 097 // For UNION, return the first branch's final table 098 // This is used for single-table resolution 099 if (!branchNamespaces.isEmpty()) { 100 return branchNamespaces.get(0).getFinalTable(); 101 } 102 return null; 103 } 104 105 @Override 106 public List<TTable> getAllFinalTables() { 107 // Return tables from ALL branches 108 List<TTable> allTables = new ArrayList<>(); 109 for (SubqueryNamespace branchNs : branchNamespaces) { 110 branchNs.validate(); 111 List<TTable> branchTables = branchNs.getAllFinalTables(); 112 allTables.addAll(branchTables); 113 } 114 return allTables; 115 } 116 117 @Override 118 protected void doValidate() { 119 // Extract columns from first branch's SELECT list (SQL standard) 120 columnSources = new LinkedHashMap<>(); 121 122 if (allBranches.isEmpty()) { 123 return; 124 } 125 126 // Validate all branch namespaces 127 for (SubqueryNamespace branchNs : branchNamespaces) { 128 branchNs.validate(); 129 } 130 131 // Get columns from first branch (defines schema) 132 TSelectSqlStatement firstBranch = allBranches.get(0); 133 TResultColumnList selectList = firstBranch.getResultColumnList(); 134 if (selectList == null) { 135 return; 136 } 137 138 for (int i = 0; i < selectList.size(); i++) { 139 TResultColumn resultCol = selectList.getResultColumn(i); 140 String colName = getColumnName(resultCol); 141 if (colName == null) { 142 colName = "col_" + (i + 1); 143 } 144 145 // For each column position, collect tables from ALL branches. 146 // UNION/MINUS columns are matched by POSITION, not by name. 147 // For each branch, check if the result column at that position is a simple column reference. 148 // If so, include all tables from that branch as potential sources. 149 List<TTable> columnTables = new ArrayList<>(); 150 151 for (int branchIdx = 0; branchIdx < allBranches.size(); branchIdx++) { 152 TSelectSqlStatement branch = allBranches.get(branchIdx); 153 TResultColumnList branchSelectList = branch.getResultColumnList(); 154 if (branchSelectList == null || i >= branchSelectList.size()) { 155 continue; 156 } 157 158 TResultColumn branchResultCol = branchSelectList.getResultColumn(i); 159 if (branchResultCol == null || branchResultCol.getExpr() == null) { 160 continue; 161 } 162 163 // Check if this result column is a simple column reference (not NULL or expression) 164 gudusoft.gsqlparser.nodes.TExpression expr = branchResultCol.getExpr(); 165 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 166 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 167 if (objName != null) { 168 // Get the column name at this position in the branch 169 String branchColName = objName.getColumnNameOnly(); 170 171 // For UNION data lineage, only include tables where the column name 172 // in this branch matches the column name from the first branch. 173 // This prevents incorrect associations like CDS_APP.bankcode when 174 // CDS_APP branch actually has c_mandant at that position. 175 boolean columnNameMatches = nameMatcher.matches(branchColName, colName); 176 177 // First try to get the source table from the column reference itself 178 TTable sourceTable = objName.getSourceTable(); 179 if (sourceTable != null && !columnTables.contains(sourceTable) && columnNameMatches) { 180 // Phase 1 resolved this column - add if column name matches 181 columnTables.add(sourceTable); 182 } else if (sourceTable == null && columnNameMatches) { 183 // If sourceTable is not set (Phase 1 resolution hasn't happened), 184 // check if there's a qualified reference (e.g., t.col) 185 String tableQualifier = objName.getTableString(); 186 if (tableQualifier != null && !tableQualifier.isEmpty() && branch.tables != null) { 187 // Qualified column - try to find the table by alias or name 188 for (int ti = 0; ti < branch.tables.size(); ti++) { 189 TTable t = branch.tables.getTable(ti); 190 if (t != null) { 191 String alias = t.getAliasName(); 192 String name = t.getTableName() != null ? t.getTableName().toString() : null; 193 if ((alias != null && alias.equalsIgnoreCase(tableQualifier)) || 194 (name != null && name.equalsIgnoreCase(tableQualifier))) { 195 if (!columnTables.contains(t)) { 196 columnTables.add(t); 197 } 198 break; 199 } 200 } 201 } 202 } else if (branch.tables != null) { 203 // Unqualified column reference - add only the first non-subquery/join table 204 for (int ti = 0; ti < branch.tables.size(); ti++) { 205 TTable t = branch.tables.getTable(ti); 206 if (t != null && !columnTables.contains(t)) { 207 // Skip subqueries and joins - they're not final tables 208 if (t.getTableType() != gudusoft.gsqlparser.ETableSource.subquery && 209 t.getTableType() != gudusoft.gsqlparser.ETableSource.join) { 210 columnTables.add(t); 211 break; // Only add the first table for unqualified columns 212 } 213 } 214 } 215 } 216 } 217 } 218 } 219 // For non-column expressions (NULL, functions, etc.), don't add any tables 220 } 221 222 // Create column source with candidateTables from branches that have the column 223 // Always pass the list (even if empty) so getAllFinalTables() knows we explicitly 224 // determined the candidate tables rather than needing to delegate to namespace 225 ColumnSource source = new ColumnSource( 226 this, 227 colName, 228 resultCol, 229 1.0, 230 "union_column", 231 null, // overrideTable 232 columnTables // Pass empty list when no tables match 233 ); 234 235 columnSources.put(colName, source); 236 } 237 } 238 239 /** 240 * Extract column name from TResultColumn. 241 */ 242 private String getColumnName(TResultColumn resultCol) { 243 // Check for alias 244 if (resultCol.getAliasClause() != null && 245 resultCol.getAliasClause().getAliasName() != null) { 246 return resultCol.getAliasClause().getAliasName().toString(); 247 } 248 249 // Check for simple column reference 250 if (resultCol.getExpr() != null) { 251 gudusoft.gsqlparser.nodes.TExpression expr = resultCol.getExpr(); 252 if (expr.getExpressionType() == gudusoft.gsqlparser.EExpressionType.simple_object_name_t) { 253 gudusoft.gsqlparser.nodes.TObjectName objName = expr.getObjectOperand(); 254 if (objName != null) { 255 return objName.getColumnNameOnly(); 256 } 257 } 258 } 259 260 return null; 261 } 262 263 @Override 264 public ColumnLevel hasColumn(String columnName) { 265 ensureValidated(); 266 267 // Check in explicit columns from first branch 268 for (String existingCol : columnSources.keySet()) { 269 if (nameMatcher.matches(existingCol, columnName)) { 270 return ColumnLevel.EXISTS; 271 } 272 } 273 274 // Check in inferred columns 275 if (inferredColumns != null && inferredColumns.containsKey(columnName)) { 276 return ColumnLevel.EXISTS; 277 } 278 279 // If any branch has SELECT *, unknown columns MAYBE exist 280 if (hasStarColumn()) { 281 return ColumnLevel.MAYBE; 282 } 283 284 return ColumnLevel.NOT_EXISTS; 285 } 286 287 @Override 288 public ColumnSource resolveColumn(String columnName) { 289 ensureValidated(); 290 291 // First check explicit columns from first branch 292 ColumnSource source = super.resolveColumn(columnName); 293 if (source != null) { 294 return source; 295 } 296 297 // Then check inferred columns 298 if (inferredColumns != null) { 299 for (Map.Entry<String, ColumnSource> entry : inferredColumns.entrySet()) { 300 if (nameMatcher.matches(entry.getKey(), columnName)) { 301 return entry.getValue(); 302 } 303 } 304 } 305 306 // If has star column, auto-infer this column 307 if (hasStarColumn()) { 308 boolean added = addInferredColumn(columnName, 0.8, "auto_inferred_from_outer_reference"); 309 if (added && inferredColumns != null) { 310 ColumnSource inferredSource = inferredColumns.get(columnName); 311 if (inferredSource != null) { 312 return inferredSource; 313 } 314 } 315 } 316 317 return null; 318 } 319 320 @Override 321 public TSelectSqlStatement getSelectStatement() { 322 return unionQuery; 323 } 324 325 @Override 326 public boolean hasStarColumn() { 327 // Returns true if ANY branch has SELECT * 328 for (SubqueryNamespace branchNs : branchNamespaces) { 329 if (branchNs.hasStarColumn()) { 330 return true; 331 } 332 } 333 return false; 334 } 335 336 @Override 337 public boolean supportsDynamicInference() { 338 return hasStarColumn(); 339 } 340 341 @Override 342 public boolean addInferredColumn(String columnName, double confidence, String evidence) { 343 if (columnName == null || columnName.isEmpty()) { 344 return false; 345 } 346 347 // Initialize maps if needed 348 if (inferredColumns == null) { 349 inferredColumns = new LinkedHashMap<>(); 350 } 351 if (inferredColumnNames == null) { 352 inferredColumnNames = new HashSet<>(); 353 } 354 355 // Check if already exists in explicit columns 356 if (columnSources != null && columnSources.containsKey(columnName)) { 357 return false; 358 } 359 360 // Check if already inferred 361 if (inferredColumns.containsKey(columnName)) { 362 return false; 363 } 364 365 // Collect final tables from ALL branches that support dynamic inference (have SELECT *) 366 // For data lineage, we need to track that columns could come from any UNION branch 367 // The formatter will output all candidates for UNION columns when isCandidatesFromUnion is true 368 java.util.List<TTable> candidateTables = new java.util.ArrayList<>(); 369 for (SubqueryNamespace branchNs : branchNamespaces) { 370 // Only collect from branches that could have inferred columns 371 if (!branchNs.supportsDynamicInference()) { 372 continue; 373 } 374 branchNs.validate(); 375 java.util.List<TTable> branchTables = branchNs.getAllFinalTables(); 376 for (TTable table : branchTables) { 377 if (table != null && !candidateTables.contains(table)) { 378 candidateTables.add(table); 379 } 380 } 381 } 382 383 // Create inferred column source for this union namespace WITH candidate tables 384 ColumnSource source = new ColumnSource( 385 this, 386 columnName, 387 null, 388 confidence, 389 evidence, 390 null, // overrideTable is null for UNION columns 391 candidateTables.isEmpty() ? null : candidateTables 392 ); 393 394 inferredColumns.put(columnName, source); 395 inferredColumnNames.add(columnName); 396 397 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 398 System.out.println("[UnionNamespace] Added '" + columnName + "' to " + alias + 399 ", propagating to " + branchNamespaces.size() + " branches"); 400 } 401 402 // CRITICAL: Propagate to branches that support dynamic inference (have SELECT *) 403 // Only propagate to branches with star columns - branches with explicit column lists 404 // either have the column explicitly or don't have it at all. 405 for (SubqueryNamespace branchNs : branchNamespaces) { 406 // Only propagate to branches that can accept inferred columns 407 if (!branchNs.supportsDynamicInference()) { 408 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 409 System.out.println("[UnionNamespace] Skipping branch " + branchNs.getDisplayName() + 410 " (no star column)"); 411 } 412 continue; 413 } 414 if (gudusoft.gsqlparser.TBaseType.DUMP_RESOLVER_LOG_TO_CONSOLE) { 415 System.out.println("[UnionNamespace] Propagating '" + columnName + "' to branch " + branchNs.getDisplayName()); 416 } 417 branchNs.addInferredColumn(columnName, confidence, evidence + "_union_propagate"); 418 } 419 420 return true; 421 } 422 423 @Override 424 public Set<String> getInferredColumns() { 425 if (inferredColumnNames == null) { 426 return Collections.emptySet(); 427 } 428 return Collections.unmodifiableSet(inferredColumnNames); 429 } 430 431 /** 432 * Get all branch namespaces. 433 * Useful for external code that needs to iterate over branches. 434 */ 435 public List<SubqueryNamespace> getBranchNamespaces() { 436 return Collections.unmodifiableList(branchNamespaces); 437 } 438 439 /** 440 * Get all branch SELECT statements. 441 */ 442 public List<TSelectSqlStatement> getAllBranches() { 443 return Collections.unmodifiableList(allBranches); 444 } 445 446 /** 447 * Get the number of UNION branches. 448 */ 449 public int getBranchCount() { 450 return allBranches.size(); 451 } 452 453 @Override 454 public String toString() { 455 int totalColumns = (columnSources != null ? columnSources.size() : 0) + 456 (inferredColumns != null ? inferredColumns.size() : 0); 457 return "UnionNamespace(" + getDisplayName() + 458 ", branches=" + allBranches.size() + 459 ", columns=" + totalColumns + 460 ", inferred=" + (inferredColumns != null ? inferredColumns.size() : 0) + ")"; 461 } 462}