001package gudusoft.gsqlparser.pp2.dialect;
002
003import gudusoft.gsqlparser.EDbVendor;
004
005import java.util.Set;
006
007/**
008 * Per-dialect layout behaviour. The lexical island pipeline consults the
009 * strategy for the (small) set of things that genuinely vary by vendor, instead
010 * of branching on {@link EDbVendor} throughout the rules. The
011 * {@link DialectRegistry} maps a vendor to its strategy ({@link GenericDialect}
012 * by default).
013 *
014 * <p>v2 keeps this surface deliberately small. New per-dialect behaviour is
015 * added only when a smoke test demands it (plan §7.5 #15); most dialects use
016 * {@link GenericDialect}.
017 *
018 * <p>Plan reference: §7.3/S30, §7.4/S30, §6 dialect/*.
019 */
020public interface DialectStrategy {
021
022    /** The vendor this strategy serves. */
023    EDbVendor getVendor();
024
025    /**
026     * Upper-cased clause-start <i>phrases</i> for this dialect, beyond the
027     * universal clause keywords (FROM/WHERE/GROUP/ORDER/HAVING/...). Each entry
028     * is one or more space-separated words; the clause-linebreak rule breaks
029     * before a phrase only when the whole phrase matches consecutive tokens (so
030     * {@code "CONNECT BY"} breaks before {@code CONNECT}, but a bare
031     * {@code CONNECT} does not). For example Oracle adds {@code "CONNECT BY"} /
032     * {@code "START WITH"} and Snowflake adds {@code "QUALIFY"}.
033     * {@link GenericDialect} returns an empty set.
034     */
035    Set<String> additionalClauseKeywords();
036}