001package gudusoft.gsqlparser.pp2.layout.rules; 002 003import gudusoft.gsqlparser.ETokenType; 004import gudusoft.gsqlparser.pp2.layout.LayoutContext; 005import gudusoft.gsqlparser.pp2.layout.LayoutPriorities; 006import gudusoft.gsqlparser.pp2.layout.LayoutRule; 007import gudusoft.gsqlparser.pp2.token.Pp2Token; 008import gudusoft.gsqlparser.pp2.token.Pp2TokenStream; 009import gudusoft.gsqlparser.pp2.token.TokenRole; 010 011import java.util.Arrays; 012import java.util.Collections; 013import java.util.HashSet; 014import java.util.Locale; 015import java.util.Set; 016 017/** 018 * Lays out set operators vertically: a linebreak before the operator keyword 019 * ({@code UNION}/{@code INTERSECT}/{@code EXCEPT}/{@code MINUS}) and before the 020 * {@code SELECT} that follows the operator. So 021 * {@code SELECT 1 UNION ALL SELECT 2} becomes: 022 * 023 * <pre> 024 * SELECT 1 025 * UNION ALL 026 * SELECT 2 027 * </pre> 028 * 029 * <p>The break goes before the operator's first word only, so a multi-word 030 * operator like {@code UNION ALL} stays on one line ({@code ALL} carries no 031 * break). Priority {@link LayoutPriorities#CLAUSE_LINEBREAK}. Iterative; 032 * read-only over tokens. 033 * 034 * <p>Plan reference: §7.3/S25, §7.4/S25. 035 */ 036public final class SetOperatorRules implements LayoutRule { 037 038 private static final Set<String> SET_OPS; 039 static { 040 Set<String> s = new HashSet<String>(Arrays.asList( 041 "UNION", "INTERSECT", "EXCEPT", "MINUS")); 042 SET_OPS = Collections.unmodifiableSet(s); 043 } 044 045 @Override 046 public int priority() { return LayoutPriorities.CLAUSE_LINEBREAK; } 047 048 @Override 049 public String name() { return "SetOperatorRules"; } 050 051 @Override 052 public void apply(LayoutContext context) { 053 Pp2TokenStream stream = context.getStream(); 054 int n = stream.size(); 055 for (int i = 1; i < n; i++) { 056 Pp2Token t = stream.get(i); 057 if (isSetOp(stream, i)) { 058 // Break before the operator (first word of UNION [ALL], etc.). 059 context.requestLinebreaksBefore(i, 1); 060 context.requestBlanksBefore(i, 0); 061 } else if (t.hasRole(TokenRole.KEYWORD_MASTER) && isSelect(t) 062 && followsSetOp(stream, i)) { 063 // Break before the SELECT that begins the next set-operation arm. 064 context.requestLinebreaksBefore(i, 1); 065 context.requestBlanksBefore(i, 0); 066 } 067 } 068 } 069 070 /** 071 * A SELECT preceded — skipping comments and {@code ALL}/{@code DISTINCT} — 072 * by a real set operator (a bare arm like {@code UNION ALL SELECT ...}). 073 * 074 * <p>Parenthesised arms ({@code UNION ALL (SELECT ...)}) are intentionally 075 * <i>not</i> broken here: the inner SELECT is a subquery whose layout (and 076 * the spacing of the wrapping paren) is owned by the JOIN/paren rules (S26) 077 * and the subquery/indent rules (S28). Forcing a break before the inner 078 * SELECT here would produce {@code all(\nselect} before those rules run. 079 */ 080 private static boolean followsSetOp(Pp2TokenStream stream, int i) { 081 for (int j = i - 1; j >= 0; j--) { 082 Pp2Token p = stream.get(j); 083 ETokenType type = p.getSourceToken().tokentype; 084 if (isComment(type)) continue; 085 if (type == ETokenType.ttkeyword) { 086 String u = upper(p); 087 if ("ALL".equals(u) || "DISTINCT".equals(u)) continue; 088 return SET_OPS.contains(u); 089 } 090 return false; 091 } 092 return false; 093 } 094 095 /** 096 * Whether the keyword at {@code i} is a real set operator (not, e.g., 097 * BigQuery's {@code SELECT * EXCEPT(col)} column-exclusion). A real set 098 * operator is either not immediately followed by {@code (}, or its 099 * following {@code (} begins a parenthesised query ({@code (SELECT ...)} / 100 * {@code (WITH ...)}). 101 */ 102 private static boolean isSetOp(Pp2TokenStream stream, int i) { 103 Pp2Token t = stream.get(i); 104 if (t.getSourceToken().tokentype != ETokenType.ttkeyword) return false; 105 if (!SET_OPS.contains(upper(t))) return false; 106 int next = nextSolid(stream, i); 107 if (next < 0) return true; // trailing operator; harmless to break before 108 Pp2Token nt = stream.get(next); 109 if (nt.getSourceToken().tokentype == ETokenType.ttleftparenthesis) { 110 int after = nextSolid(stream, next); 111 if (after < 0) return false; 112 String u = upper(stream.get(after)); 113 return "SELECT".equals(u) || "WITH".equals(u); 114 } 115 return true; // followed by SELECT / ALL / DISTINCT / etc. 116 } 117 118 private static int nextSolid(Pp2TokenStream stream, int from) { 119 for (int j = from + 1; j < stream.size(); j++) { 120 if (!isComment(stream.get(j).getSourceToken().tokentype)) return j; 121 } 122 return -1; 123 } 124 125 private static boolean isSelect(Pp2Token t) { 126 return t.getSourceToken().tokentype == ETokenType.ttkeyword 127 && "SELECT".equals(upper(t)); 128 } 129 130 private static String upper(Pp2Token t) { 131 String s = t.getText(); 132 return s == null ? "" : s.toUpperCase(Locale.ROOT); 133 } 134 135 private static boolean isComment(ETokenType type) { 136 return type == ETokenType.ttsimplecomment 137 || type == ETokenType.ttbracketedcomment 138 || type == ETokenType.ttCPPComment; 139 } 140}