001package gudusoft.gsqlparser.pp2.layout.rules;
002
003import gudusoft.gsqlparser.ETokenType;
004import gudusoft.gsqlparser.pp2.layout.LayoutContext;
005import gudusoft.gsqlparser.pp2.layout.LayoutPriorities;
006import gudusoft.gsqlparser.pp2.layout.LayoutRule;
007import gudusoft.gsqlparser.pp2.token.Pp2Token;
008import gudusoft.gsqlparser.pp2.token.Pp2TokenStream;
009import gudusoft.gsqlparser.pp2.token.TokenRole;
010
011import java.util.Arrays;
012import java.util.Collections;
013import java.util.HashSet;
014import java.util.Locale;
015import java.util.Set;
016
017/**
018 * Lays out set operators vertically: a linebreak before the operator keyword
019 * ({@code UNION}/{@code INTERSECT}/{@code EXCEPT}/{@code MINUS}) and before the
020 * {@code SELECT} that follows the operator. So
021 * {@code SELECT 1 UNION ALL SELECT 2} becomes:
022 *
023 * <pre>
024 * SELECT 1
025 * UNION ALL
026 * SELECT 2
027 * </pre>
028 *
029 * <p>The break goes before the operator's first word only, so a multi-word
030 * operator like {@code UNION ALL} stays on one line ({@code ALL} carries no
031 * break). Priority {@link LayoutPriorities#CLAUSE_LINEBREAK}. Iterative;
032 * read-only over tokens.
033 *
034 * <p>Plan reference: §7.3/S25, §7.4/S25.
035 */
036public final class SetOperatorRules implements LayoutRule {
037
038    private static final Set<String> SET_OPS;
039    static {
040        Set<String> s = new HashSet<String>(Arrays.asList(
041            "UNION", "INTERSECT", "EXCEPT", "MINUS"));
042        SET_OPS = Collections.unmodifiableSet(s);
043    }
044
045    @Override
046    public int priority() { return LayoutPriorities.CLAUSE_LINEBREAK; }
047
048    @Override
049    public String name() { return "SetOperatorRules"; }
050
051    @Override
052    public void apply(LayoutContext context) {
053        Pp2TokenStream stream = context.getStream();
054        int n = stream.size();
055        for (int i = 1; i < n; i++) {
056            Pp2Token t = stream.get(i);
057            if (isSetOp(stream, i)) {
058                // Break before the operator (first word of UNION [ALL], etc.).
059                context.requestLinebreaksBefore(i, 1);
060                context.requestBlanksBefore(i, 0);
061            } else if (t.hasRole(TokenRole.KEYWORD_MASTER) && isSelect(t)
062                    && followsSetOp(stream, i)) {
063                // Break before the SELECT that begins the next set-operation arm.
064                context.requestLinebreaksBefore(i, 1);
065                context.requestBlanksBefore(i, 0);
066            }
067        }
068    }
069
070    /**
071     * A SELECT preceded — skipping comments and {@code ALL}/{@code DISTINCT} —
072     * by a real set operator (a bare arm like {@code UNION ALL SELECT ...}).
073     *
074     * <p>Parenthesised arms ({@code UNION ALL (SELECT ...)}) are intentionally
075     * <i>not</i> broken here: the inner SELECT is a subquery whose layout (and
076     * the spacing of the wrapping paren) is owned by the JOIN/paren rules (S26)
077     * and the subquery/indent rules (S28). Forcing a break before the inner
078     * SELECT here would produce {@code all(\nselect} before those rules run.
079     */
080    private static boolean followsSetOp(Pp2TokenStream stream, int i) {
081        for (int j = i - 1; j >= 0; j--) {
082            Pp2Token p = stream.get(j);
083            ETokenType type = p.getSourceToken().tokentype;
084            if (isComment(type)) continue;
085            if (type == ETokenType.ttkeyword) {
086                String u = upper(p);
087                if ("ALL".equals(u) || "DISTINCT".equals(u)) continue;
088                return SET_OPS.contains(u);
089            }
090            return false;
091        }
092        return false;
093    }
094
095    /**
096     * Whether the keyword at {@code i} is a real set operator (not, e.g.,
097     * BigQuery's {@code SELECT * EXCEPT(col)} column-exclusion). A real set
098     * operator is either not immediately followed by {@code (}, or its
099     * following {@code (} begins a parenthesised query ({@code (SELECT ...)} /
100     * {@code (WITH ...)}).
101     */
102    private static boolean isSetOp(Pp2TokenStream stream, int i) {
103        Pp2Token t = stream.get(i);
104        if (t.getSourceToken().tokentype != ETokenType.ttkeyword) return false;
105        if (!SET_OPS.contains(upper(t))) return false;
106        int next = nextSolid(stream, i);
107        if (next < 0) return true; // trailing operator; harmless to break before
108        Pp2Token nt = stream.get(next);
109        if (nt.getSourceToken().tokentype == ETokenType.ttleftparenthesis) {
110            int after = nextSolid(stream, next);
111            if (after < 0) return false;
112            String u = upper(stream.get(after));
113            return "SELECT".equals(u) || "WITH".equals(u);
114        }
115        return true; // followed by SELECT / ALL / DISTINCT / etc.
116    }
117
118    private static int nextSolid(Pp2TokenStream stream, int from) {
119        for (int j = from + 1; j < stream.size(); j++) {
120            if (!isComment(stream.get(j).getSourceToken().tokentype)) return j;
121        }
122        return -1;
123    }
124
125    private static boolean isSelect(Pp2Token t) {
126        return t.getSourceToken().tokentype == ETokenType.ttkeyword
127            && "SELECT".equals(upper(t));
128    }
129
130    private static String upper(Pp2Token t) {
131        String s = t.getText();
132        return s == null ? "" : s.toUpperCase(Locale.ROOT);
133    }
134
135    private static boolean isComment(ETokenType type) {
136        return type == ETokenType.ttsimplecomment
137            || type == ETokenType.ttbracketedcomment
138            || type == ETokenType.ttCPPComment;
139    }
140}