001package gudusoft.gsqlparser.ir.semantic.diff; 002 003import gudusoft.gsqlparser.ir.semantic.ColumnRef; 004import gudusoft.gsqlparser.ir.semantic.LineageEdge; 005import gudusoft.gsqlparser.ir.semantic.LineageRef; 006import gudusoft.gsqlparser.ir.semantic.OutputColumn; 007import gudusoft.gsqlparser.ir.semantic.RelationKind; 008import gudusoft.gsqlparser.ir.semantic.RelationSource; 009import gudusoft.gsqlparser.ir.semantic.SemanticProgram; 010import gudusoft.gsqlparser.ir.semantic.StatementGraph; 011import gudusoft.gsqlparser.ir.semantic.builder.SemanticIRBuilder; 012 013import java.util.ArrayDeque; 014import java.util.ArrayList; 015import java.util.Deque; 016import java.util.HashMap; 017import java.util.HashSet; 018import java.util.LinkedHashMap; 019import java.util.LinkedHashSet; 020import java.util.List; 021import java.util.Locale; 022import java.util.Map; 023import java.util.Set; 024 025/** 026 * Project a {@link SemanticProgram} into a {@link CanonicalLineageModel}. 027 * 028 * <p>SELECT lineage: for each outer-statement output, BFS the 029 * program-level lineage edges (target → sources) until we hit 030 * {@code TABLE_COLUMN} terminals. Each terminal becomes one canonical 031 * SELECT edge. 032 * 033 * <p>Row influence: for each statement reachable from the outer (via 034 * CTE/SUBQUERY relations), every column ref in {@code filterColumnRefs} 035 * and {@code joinColumnRefs} is resolved to a base table by walking the 036 * lineage chain that starts at the relation it references. 037 */ 038public final class SemanticIRProjector { 039 040 private SemanticIRProjector() {} 041 042 public static ProjectorResult project(SemanticProgram program) { 043 if (program == null) { 044 throw new IllegalArgumentException("program must not be null"); 045 } 046 if (program.getStatements().isEmpty()) { 047 return ProjectorResult.unsupported( 048 ProjectorResult.UnsupportedReason.NO_RELATIONSHIPS, 049 "program has no statements"); 050 } 051 // Outer statement: last unnamed statement. Named statements are CTE 052 // bodies, FROM-subquery bodies, and (slice 11) scalar-subquery 053 // bodies (synthetic angle-bracketed names — see 054 // SemanticIRBuilder.SCALAR_BODY_PREFIX). The outer is the one that 055 // reads from them and is always unnamed. 056 int outerIndex = -1; 057 for (int i = program.getStatements().size() - 1; i >= 0; i--) { 058 if (program.getStatements().get(i).getName() == null) { 059 outerIndex = i; 060 break; 061 } 062 } 063 if (outerIndex < 0) { 064 return ProjectorResult.unsupported( 065 ProjectorResult.UnsupportedReason.NO_RELATIONSHIPS, 066 "no outer (unnamed) statement found"); 067 } 068 069 // Index lineage edges by their from-key so the BFS is O(N) per query. 070 Map<String, List<LineageEdge>> outgoingByFrom = indexLineage(program.getLineage()); 071 072 // Kind-aware lookups for row-influence resolution. A `CTE`-bound 073 // relation refers to a body whose name is the CTE name; a 074 // `SUBQUERY`-bound relation refers to a body whose name is the 075 // FROM-clause alias. Keeping the maps separate avoids collision when 076 // a CTE name and a subquery alias happen to match (e.g. 077 // {@code WITH x AS (...) SELECT FROM (SELECT ...) x}). 078 BodyIndexes bodies = new BodyIndexes(program); 079 080 StatementGraph outer = program.getStatements().get(outerIndex); 081 082 Set<String> outputNames = new LinkedHashSet<>(); 083 Map<String, Boolean> aggregateByOutput = new LinkedHashMap<>(); 084 Set<CanonicalLineageEdge> edges = new LinkedHashSet<>(); 085 086 // 1) SELECT edges per outer output. 087 for (OutputColumn out : outer.getOutputColumns()) { 088 String outName = out.getName().toLowerCase(Locale.ROOT); 089 outputNames.add(outName); 090 // Last write wins if the SQL has duplicate output names; the 091 // builder doesn't reject them today and the canonical model 092 // can't either, so we treat it as the OutputColumn list does. 093 aggregateByOutput.put(outName, out.isAggregate()); 094 095 String startKey = stmtOutputKey(outerIndex, out.getName()); 096 for (TableColumn tc : bfsToBaseColumns(startKey, outgoingByFrom)) { 097 edges.add(new CanonicalLineageEdge( 098 EdgeRole.SELECT, outName, tc.table, tc.column)); 099 } 100 } 101 102 // 2) Reachable statements from the outer. Single fixpoint BFS that 103 // combines (a) CTE / SUBQUERY relation edges and (b) program-level 104 // STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage edges. The lineage 105 // walk picks up scalar-subquery bodies (slice 11) — they are 106 // referenced only via lineage, never via relations — and any 107 // statement those scalar bodies reach via their own relations 108 // gets visited in the same pass. 109 Set<Integer> reachable = computeReachable(program, outerIndex, outgoingByFrom); 110 111 // 3) Row-influence edges from every reachable statement's filter/join refs. 112 for (int idx : reachable) { 113 StatementGraph s = program.getStatements().get(idx); 114 for (ColumnRef ref : s.getFilterColumnRefs()) { 115 addRowInfluenceEdges(EdgeRole.FILTER, idx, s, ref, 116 outgoingByFrom, bodies, edges); 117 } 118 for (ColumnRef ref : s.getJoinColumnRefs()) { 119 addRowInfluenceEdges(EdgeRole.JOIN, idx, s, ref, 120 outgoingByFrom, bodies, edges); 121 } 122 } 123 124 // 4) Slice 24: predicate-body JOIN edges. Emit one JOIN canonical 125 // edge per base-column terminal of each predicate body's 126 // OutputColumn(s). The pass is intentionally OUTSIDE the 127 // `reachable` BFS: 128 // 129 // (a) Predicate bodies are unreachable from outer by slice-23 130 // design — outer holds no relation pointing at them and no 131 // STATEMENT_OUTPUT lineage edge into them. Adding them to 132 // `reachable` would let `addRowInfluenceEdges` walk their 133 // filter/join refs, which would manufacture FILTER / JOIN 134 // edges from inner WHERE / inner JOIN refs — dlineage's BFS 135 // does not chain into RS-2's RelationRows from RS-1 136 // (slice-24 probe finding 3), so adding those edges would 137 // break the slice-7 zero-divergence guarantee. 138 // (b) Iterating ONLY OutputColumns (not filter/join refs) means 139 // inner WHERE / inner JOIN refs do NOT contribute to outer's 140 // canonical model. This matches dlineage's behaviour: the 141 // fdr clause="on" source `clauseType="selectList" 142 // parent_id=<inner-RS>` resolves to the inner-projected 143 // column's base-column terminal via fdd chains, but the 144 // non-system source attribute prevents BFS from chaining 145 // into the inner RS's RelationRows. 146 // (c) Slice-23 constant-only bodies have OutputColumns with 147 // empty `sources` → bfsToBaseColumns finds no terminals → 148 // zero JOIN edges added (preserves slice-23 zero-divergence 149 // on corpus 21). Slice-24 column-ref bodies have one 150 // ColumnRef source whose lineage chain leads to the inner 151 // base column → exactly one JOIN edge per terminal. 152 // (d) De-duplication via the `Set<CanonicalLineageEdge>` 153 // semantics: a `(JOIN, null, departments, id)` edge from 154 // outer's `d.id = e.id` and from a column-bearing EXISTS 155 // `(SELECT d.id FROM departments d)` collapse to one. 156 for (int idx = 0; idx < program.getStatements().size(); idx++) { 157 StatementGraph s = program.getStatements().get(idx); 158 if (s.getName() == null) continue; 159 if (!SemanticIRBuilder.isPredicateSubquerySyntheticName(s.getName())) continue; 160 for (OutputColumn out : s.getOutputColumns()) { 161 String startKey = stmtOutputKey(idx, out.getName()); 162 for (TableColumn tc : bfsToBaseColumns(startKey, outgoingByFrom)) { 163 edges.add(new CanonicalLineageEdge( 164 EdgeRole.JOIN, null, tc.table, tc.column)); 165 } 166 } 167 } 168 169 return ProjectorResult.ok( 170 new CanonicalLineageModel(edges, outputNames, aggregateByOutput)); 171 } 172 173 /** 174 * Resolve a {@code ColumnRef} that appears in {@code stmt}'s filter/join 175 * clause down to base-table columns and emit one row-influence edge per 176 * terminal. 177 */ 178 private static void addRowInfluenceEdges(EdgeRole role, 179 int consumerIdx, 180 StatementGraph stmt, 181 ColumnRef ref, 182 Map<String, List<LineageEdge>> outgoingByFrom, 183 BodyIndexes bodies, 184 Set<CanonicalLineageEdge> sink) { 185 RelationSource matched = null; 186 for (RelationSource r : stmt.getRelations()) { 187 if (r.getAlias().equals(ref.getRelationAlias())) { 188 matched = r; 189 break; 190 } 191 } 192 if (matched == null) { 193 // Builder guarantees an alias is in scope, but be defensive — a 194 // dropped row-influence edge would silently mask divergence. 195 return; 196 } 197 // Slice 15: resolved-kind dispatch. OUTER_REFERENCE bindings 198 // delegate to the underlying outerKind. TABLE emits a 199 // base-column edge; CTE / SUBQUERY BFS through the outer body 200 // to reach base columns (mirroring dlineage's behaviour). 201 RelationKind kind = matched.getBinding().getKind(); 202 RelationKind resolvedKind = (kind == RelationKind.OUTER_REFERENCE) 203 ? matched.getBinding().getOuterKind() 204 : kind; 205 if (resolvedKind == RelationKind.TABLE) { 206 sink.add(new CanonicalLineageEdge(role, null, 207 matched.getBinding().getQualifiedName().toLowerCase(Locale.ROOT), 208 ref.getColumnName().toLowerCase(Locale.ROOT))); 209 return; 210 } 211 if (resolvedKind == RelationKind.CTE || resolvedKind == RelationKind.SUBQUERY) { 212 Integer downstream = bodies.lookup(consumerIdx, resolvedKind, matched); 213 if (downstream == null) return; 214 String startKey = stmtOutputKey(downstream, ref.getColumnName()); 215 for (TableColumn tc : bfsToBaseColumns(startKey, outgoingByFrom)) { 216 sink.add(new CanonicalLineageEdge(role, null, tc.table, tc.column)); 217 } 218 return; 219 } 220 // UNION / UNKNOWN / null outerKind — should not appear in 221 // row-influence today. A future slice introducing them must 222 // extend this dispatch. 223 throw new IllegalStateException( 224 "unhandled RelationKind in addRowInfluenceEdges: " + kind 225 + (kind == RelationKind.OUTER_REFERENCE 226 ? " (outerKind=" + matched.getBinding().getOuterKind() + ")" 227 : "")); 228 } 229 230 /** 231 * Single-fixpoint BFS over both (a) CTE/SUBQUERY relation edges 232 * and (b) program-level STATEMENT_OUTPUT → STATEMENT_OUTPUT 233 * lineage edges (slice 11). Each pop visits both edge kinds in 234 * the same pass, so a scalar-subquery body reached via lineage 235 * has its own CTE/SUBQUERY relations traversed before the BFS 236 * terminates. The lineage edges are pre-indexed by the caller 237 * to avoid rebuilding the index inside this method. 238 */ 239 private static Set<Integer> computeReachable(SemanticProgram program, 240 int outerIndex, 241 Map<String, List<LineageEdge>> outgoingByFrom) { 242 BodyIndexes bodies = new BodyIndexes(program); 243 Set<Integer> reachable = new LinkedHashSet<>(); 244 Deque<Integer> q = new ArrayDeque<>(); 245 q.add(outerIndex); 246 reachable.add(outerIndex); 247 while (!q.isEmpty()) { 248 int idx = q.removeFirst(); 249 StatementGraph s = program.getStatements().get(idx); 250 // (a) CTE/SUBQUERY relations. 251 for (RelationSource r : s.getRelations()) { 252 RelationKind k = r.getBinding().getKind(); 253 if (k != RelationKind.CTE && k != RelationKind.SUBQUERY) continue; 254 Integer downstream = bodies.lookup(idx, k, r); 255 if (downstream != null && reachable.add(downstream)) { 256 q.add(downstream); 257 } 258 } 259 // (b) STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage edges 260 // originating from this statement's outputs (slice 11 261 // scalar-subquery body reachability). 262 for (OutputColumn out : s.getOutputColumns()) { 263 String key = stmtOutputKey(idx, out.getName()); 264 List<LineageEdge> outgoing = outgoingByFrom.get(key); 265 if (outgoing == null) continue; 266 for (LineageEdge e : outgoing) { 267 LineageRef to = e.getTo(); 268 if (to.getKind() != LineageRef.Kind.STATEMENT_OUTPUT) continue; 269 int downstream = to.getStatementIndex(); 270 if (reachable.add(downstream)) { 271 q.add(downstream); 272 } 273 } 274 } 275 } 276 return reachable; 277 } 278 279 /** Pure helper: BFS from a statement-output key over lineage edges. */ 280 private static List<TableColumn> bfsToBaseColumns(String startKey, 281 Map<String, List<LineageEdge>> outgoingByFrom) { 282 List<TableColumn> out = new ArrayList<>(); 283 Set<String> visited = new HashSet<>(); 284 Deque<String> q = new ArrayDeque<>(); 285 q.add(startKey); 286 visited.add(startKey); 287 while (!q.isEmpty()) { 288 String cur = q.removeFirst(); 289 List<LineageEdge> outgoing = outgoingByFrom.get(cur); 290 if (outgoing == null) continue; 291 for (LineageEdge e : outgoing) { 292 LineageRef to = e.getTo(); 293 if (to.getKind() == LineageRef.Kind.TABLE_COLUMN) { 294 out.add(new TableColumn( 295 to.getQualifiedName().toLowerCase(Locale.ROOT), 296 to.getColumnName().toLowerCase(Locale.ROOT))); 297 } else { 298 String nextKey = stmtOutputKey(to.getStatementIndex(), to.getOutputName()); 299 if (visited.add(nextKey)) { 300 q.add(nextKey); 301 } 302 } 303 } 304 } 305 return out; 306 } 307 308 private static Map<String, List<LineageEdge>> indexLineage(List<LineageEdge> all) { 309 Map<String, List<LineageEdge>> out = new HashMap<>(); 310 for (LineageEdge e : all) { 311 LineageRef from = e.getFrom(); 312 if (from.getKind() != LineageRef.Kind.STATEMENT_OUTPUT) continue; 313 String key = stmtOutputKey(from.getStatementIndex(), from.getOutputName()); 314 out.computeIfAbsent(key, k -> new ArrayList<>()).add(e); 315 } 316 return out; 317 } 318 319 private static String stmtOutputKey(int idx, String name) { 320 return idx + "/" + name; 321 } 322 323 private static final class TableColumn { 324 final String table; 325 final String column; 326 TableColumn(String table, String column) { 327 this.table = table; 328 this.column = column; 329 } 330 } 331 332 /** 333 * Kind-aware lookup from a CTE name or subquery alias to the index of 334 * its body statement, scoped per consumer statement. Maps are keyed by 335 * {@code consumerStmtIdx + "|" + name_lower}. 336 * 337 * <p>Slice 18 makes the lookup consumer-scoped (was: global alias-keyed) 338 * to handle the new shape "two CTE bodies each containing a FROM-subquery 339 * aliased {@code s}". With consumer-scoping each consumer sees its own 340 * body, instead of the last-write-wins value of a global map. 341 * 342 * <p>Construction (two passes): 343 * <ol> 344 * <li>Walk consumers in statement order over their direct CTE / SUBQUERY 345 * relations. {@code SUBQUERY} consumers exclusively claim a body 346 * (FROM-subquery bodies are extracted by exactly one consumer); 347 * {@code CTE} consumers do <i>not</i> claim — CTEs are reusable, so 348 * multiple consumers can record their own per-consumer entry pointing 349 * at the same CTE body. Subquery-claimed bodies are skipped by CTE 350 * pickers so a CTE name colliding with a FROM alias does not 351 * mis-bind.</li> 352 * <li>Walk STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage edges to derive 353 * each child statement's immediate parent. For each child with an 354 * {@code OUTER_REFERENCE-of-CTE} or {@code OUTER_REFERENCE-of-SUBQUERY} 355 * relation, walk the immediate-parent chain (innermost → outermost) 356 * until an ancestor's Pass-1 entry is found, and copy that entry 357 * under {@code (childIdx, name)}. This is what makes the consumer- 358 * keyed lookup self-sufficient for slice-14/15 OUTER_REFERENCE row- 359 * influence — without it, a scalar body's lookup at its own index 360 * would return {@code null} because Pass 1 only stores entries for 361 * direct CTE/SUBQUERY relations (slice-15 invariant: OUTER_REFERENCE 362 * does not claim). Slice 20 generalises one-step propagation to a 363 * transitive chain walk so doubly-nested (and deeper) correlated 364 * scalars resolve their grandparent body via the same lookup.</li> 365 * </ol> 366 * 367 * <p>Collision behaviour: when both a CTE named {@code x} and a subquery 368 * aliased {@code x} exist, the SUBQUERY consumer's exclusive claim takes 369 * the FROM-subquery body (latest unclaimed prior); the CTE consumer's 370 * non-claiming pick takes the CTE body (earliest non-subquery-claimed 371 * prior). Pinned by {@code subqueryAliasResolvesPastUnusedCteWithSameName} 372 * and {@code outerReferenceRelationDoesNotClaimBodies} in 373 * {@link gudusoft.gsqlparser.ir.semantic.slice7.SemanticIRProjectorBodyIndexesTest}. 374 * 375 * <p>Unused CTEs (declared but never referenced) have no entry in any 376 * map. Reachability BFS does not visit them, so this is correct — there 377 * is no caller that could ask for them. Slice 18 removed the legacy 378 * default-population pass for the same reason. 379 */ 380 private static final class BodyIndexes { 381 // Per-consumer claim. Key: "<consumerStmtIdx>|<name_lower>". 382 private final Map<String, Integer> cteByConsumerAndName = new HashMap<>(); 383 private final Map<String, Integer> subqueryByConsumerAndAlias = new HashMap<>(); 384 385 BodyIndexes(SemanticProgram program) { 386 // Build the candidate-bodies-by-name index. Synthetic-named 387 // bodies (slice-11 scalar bodies, slice-12 set-op branches) are 388 // skipped — they are not CTE/FROM-subquery candidates and are 389 // reached only via lineage edges in computeReachable. 390 Map<String, java.util.List<Integer>> bodyIndicesByName = new HashMap<>(); 391 for (int i = 0; i < program.getStatements().size(); i++) { 392 StatementGraph s = program.getStatements().get(i); 393 if (s.getName() == null) continue; 394 if (SemanticIRBuilder.isScalarSyntheticName(s.getName())) continue; 395 if (SemanticIRBuilder.isSetOpBranchSyntheticName(s.getName())) continue; 396 if (SemanticIRBuilder.isPredicateSubquerySyntheticName(s.getName())) continue; 397 bodyIndicesByName 398 .computeIfAbsent(s.getName().toLowerCase(Locale.ROOT), 399 k -> new java.util.ArrayList<>()) 400 .add(i); 401 } 402 403 // Pass 1: walk consumer relations in statement order. Body 404 // claiming is kind-aware: 405 // - CTE consumer: pick the EARLIEST candidate matching the 406 // name with index < ci that is not already subquery-claimed. 407 // Do NOT claim — CTE bodies are reusable across consumers. 408 // - SUBQUERY consumer: pick the LATEST unclaimed candidate 409 // matching the alias with index < ci. Exclusive claim. 410 // 411 // Slice-15 invariant: OUTER_REFERENCE relations do NOT claim 412 // bodies in this pass — the filter at line ~XXX excludes them. 413 // Pass 2 below derives their per-consumer entries from the 414 // parent's entries via lineage. 415 Set<Integer> subqueryClaimed = new HashSet<>(); 416 for (int ci = 0; ci < program.getStatements().size(); ci++) { 417 StatementGraph cs = program.getStatements().get(ci); 418 for (RelationSource r : cs.getRelations()) { 419 RelationKind k = r.getBinding().getKind(); 420 if (k != RelationKind.CTE && k != RelationKind.SUBQUERY) continue; 421 String name = (k == RelationKind.SUBQUERY) 422 ? r.getAlias().toLowerCase(Locale.ROOT) 423 : r.getBinding().getQualifiedName().toLowerCase(Locale.ROOT); 424 java.util.List<Integer> candidates = bodyIndicesByName.get(name); 425 if (candidates == null) continue; 426 Integer chosen = null; 427 if (k == RelationKind.CTE) { 428 for (Integer idx : candidates) { 429 if (idx >= ci) break; 430 if (subqueryClaimed.contains(idx)) continue; 431 chosen = idx; 432 break; 433 } 434 } else { // SUBQUERY 435 for (int j = candidates.size() - 1; j >= 0; j--) { 436 int idx = candidates.get(j); 437 if (idx >= ci) continue; 438 if (subqueryClaimed.contains(idx)) continue; 439 chosen = idx; 440 break; 441 } 442 if (chosen != null) subqueryClaimed.add(chosen); 443 } 444 if (chosen == null) continue; 445 String key = ci + "|" + name; 446 if (k == RelationKind.CTE) cteByConsumerAndName.put(key, chosen); 447 else subqueryByConsumerAndAlias.put(key, chosen); 448 } 449 } 450 451 // Pass 2: derive each child statement's immediate parent from 452 // STATEMENT_OUTPUT → STATEMENT_OUTPUT lineage. For each child 453 // with an OUTER_REFERENCE-of-CTE or OUTER_REFERENCE-of-SUBQUERY 454 // relation, walk the IMMEDIATE-PARENT CHAIN (innermost → 455 // outermost) until an ancestor's per-consumer Pass-1 entry is 456 // found, and copy that entry under the child's index. This 457 // makes the consumer-keyed lookup self-sufficient for 458 // OUTER_REFERENCE row-influence at any nesting depth (slice 18 459 // codex round-2 MUST 2; slice 20 generalises one-step → chain). 460 // 461 // Slice-14/15 carry only the immediate-parent scope and one 462 // STATEMENT_OUTPUT edge per child output is typical, so 463 // first-write-wins is correct. Slice-20 chain walks support 464 // doubly-nested (and deeper) correlated scalars where the 465 // inner-inner's OUTER_REFERENCE is anchored at the 466 // grandparent (or further-ancestor) statement. 467 // 468 // Cycle guard via `visited`: STATEMENT_OUTPUT → STATEMENT_OUTPUT 469 // is a DAG by construction (a child's output never flows back 470 // to an ancestor's output), but the guard makes the 471 // non-termination invariant explicit. 472 Map<Integer, Integer> immediateParent = new HashMap<>(); 473 for (LineageEdge e : program.getLineage()) { 474 if (e.getFrom().getKind() != LineageRef.Kind.STATEMENT_OUTPUT) continue; 475 if (e.getTo().getKind() != LineageRef.Kind.STATEMENT_OUTPUT) continue; 476 // Slice 24: predicate-body edges do NOT anchor immediate- 477 // parent chains. The predicate body is unreachable from 478 // outer (slice-23 invariant); recording it as the 479 // "immediate parent" of a CTE / SUBQUERY body would 480 // mis-route OUTER_REFERENCE chain walks below — a slice-15 481 // / slice-20 child looking for the CTE owner would land 482 // at the predicate body's index, find no Pass-1 entry, 483 // walk to the predicate's parent (none recorded), and 484 // give up. Slice-23 constant predicate bodies emitted no 485 // STATEMENT_OUTPUT → STATEMENT_OUTPUT edges (their 486 // synthetic OutputColumn had empty sources), so the side 487 // effect surfaces only when slice-24 column-bearing 488 // predicate bodies have CTE-bound inner relations. 489 int fromIdx = e.getFrom().getStatementIndex(); 490 StatementGraph fromStmt = program.getStatements().get(fromIdx); 491 if (fromStmt.getName() != null 492 && SemanticIRBuilder.isPredicateSubquerySyntheticName(fromStmt.getName())) { 493 continue; 494 } 495 immediateParent.putIfAbsent( 496 e.getTo().getStatementIndex(), 497 e.getFrom().getStatementIndex()); 498 } 499 for (Map.Entry<Integer, Integer> entry : immediateParent.entrySet()) { 500 int childIdx = entry.getKey(); 501 StatementGraph cs = program.getStatements().get(childIdx); 502 for (RelationSource r : cs.getRelations()) { 503 if (r.getBinding().getKind() != RelationKind.OUTER_REFERENCE) continue; 504 RelationKind ok = r.getBinding().getOuterKind(); 505 if (ok != RelationKind.CTE && ok != RelationKind.SUBQUERY) continue; 506 String name = (ok == RelationKind.SUBQUERY) 507 ? r.getAlias().toLowerCase(Locale.ROOT) 508 : r.getBinding().getQualifiedName().toLowerCase(Locale.ROOT); 509 Map<String, Integer> sourceMap = (ok == RelationKind.CTE) 510 ? cteByConsumerAndName : subqueryByConsumerAndAlias; 511 Integer body = null; 512 Integer cur = entry.getValue(); 513 Set<Integer> visited = new HashSet<>(); 514 visited.add(childIdx); // never claim self 515 while (cur != null && visited.add(cur)) { 516 body = sourceMap.get(cur + "|" + name); 517 if (body != null) break; 518 cur = immediateParent.get(cur); 519 } 520 if (body == null) continue; 521 String childKey = childIdx + "|" + name; 522 if (ok == RelationKind.CTE) 523 cteByConsumerAndName.putIfAbsent(childKey, body); 524 else 525 subqueryByConsumerAndAlias.putIfAbsent(childKey, body); 526 } 527 } 528 } 529 530 Integer lookup(int consumerIdx, RelationKind kind, RelationSource consumer) { 531 String name = (kind == RelationKind.CTE) 532 ? consumer.getBinding().getQualifiedName().toLowerCase(Locale.ROOT) 533 : consumer.getAlias().toLowerCase(Locale.ROOT); 534 String key = consumerIdx + "|" + name; 535 if (kind == RelationKind.CTE) return cteByConsumerAndName.get(key); 536 if (kind == RelationKind.SUBQUERY) return subqueryByConsumerAndAlias.get(key); 537 return null; 538 } 539 } 540}