001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.EErrorType; 005import gudusoft.gsqlparser.EOBTenantMode; 006import gudusoft.gsqlparser.TStatementList; 007import gudusoft.gsqlparser.TSyntaxError; 008import gudusoft.gsqlparser.sqlcmds.TSqlCmdsOceanbase; 009 010/** 011 * OceanBase SQL parser — Phase 1 delegation skeleton. 012 * 013 * <p>OceanBase is a multi-tenant database whose SQL surface depends on the 014 * tenant kind. Each tenant is permanently MySQL-compatible, Oracle-compatible, 015 * or the system ({@code sys}) tenant. This parser is a thin 016 * {@link SqlParser} adapter that selects a delegate based on the 017 * {@link EOBTenantMode} carried inside {@link ParserContext} (mirrored from 018 * {@code TGSqlParser.setOBTenantMode}). 019 * 020 * <h2>Phase 1 scope</h2> 021 * 022 * <p>This class implements {@link SqlParser} directly rather than extending 023 * {@link AbstractSqlParser}. The reason is that {@link AbstractSqlParser#parse} 024 * is {@code final} and the template method orchestrates state on the 025 * subclass instance ({@code sourcetokenlist}, {@code sqlstatements}, 026 * {@code lexer}, {@code parserContext}, etc.). Splitting that state across 027 * an OceanBase wrapper plus a wrapped MySQL/Oracle delegate would be a 028 * concurrency-unsafe nightmare and defeat the purpose of the abstract 029 * template. By implementing the small {@link SqlParser} interface directly, 030 * Phase 1 forwards the entire parse cycle to a single delegate as an atomic 031 * unit, which is both simpler and easier to reason about. 032 * 033 * <p>The delegate's lexer/parser/AST machinery is reused as-is. AST nodes 034 * built during delegation report {@code dbvoceanbase} as their vendor 035 * because {@code TGSqlParser.doDelegatedRawParse} re-binds the 036 * NodeFactory back-reference to the OceanBase-configured {@link 037 * gudusoft.gsqlparser.TGSqlParser} after the vendor parser returns. This is 038 * the same fixup mechanism existing delegated vendors rely on. 039 * 040 * <h2>What Phase 1 does NOT do</h2> 041 * 042 * <ul> 043 * <li>Does NOT parse OceanBase-specific syntax such as 044 * {@code CREATE TENANT}, {@code ALTER SYSTEM}, OB hint payloads, 045 * OB partition syntax, tablegroups, outlines, or global indexes. 046 * OB-specific syntax is added incrementally in Phase 4 after the 047 * grammar forks land in Phases 2/3.</li> 048 * <li>Does NOT auto-detect tenant mode from SQL content. Mode is 049 * strictly explicit via 050 * {@link gudusoft.gsqlparser.TGSqlParser#setOBTenantMode}. Boundary 051 * detection of admin prefixes is handled by 052 * {@code TSqlCmdsOceanbase} for statement splitting only.</li> 053 * <li>Does NOT switch modes per-statement. One mode per 054 * {@link gudusoft.gsqlparser.TGSqlParser} instance — OceanBase 055 * tenants are immutable in mode at creation time on the server side.</li> 056 * </ul> 057 * 058 * <h2>Promotion status</h2> 059 * 060 * <p>Phase 2 — landed: {@link EOBTenantMode#MYSQL} and 061 * {@link EOBTenantMode#SYSTEM} route to {@link OceanBaseMysqlSqlParser} 062 * (the forked MySQL grammar). 063 * 064 * <p>Phase 3 — landed: {@link EOBTenantMode#ORACLE} routes to 065 * {@link OceanBaseOracleSqlParser} (the forked Oracle dual grammar 066 * with full PL/SQL support). The public contract of this class does 067 * not change across these promotions. 068 * 069 * @see SqlParser 070 * @see EOBTenantMode 071 * @see gudusoft.gsqlparser.TGSqlParser#setOBTenantMode 072 * @since 4.0.1.4 073 */ 074public class OceanBaseSqlParser implements SqlParser { 075 076 /** 077 * Lazily-constructed delegate for {@link EOBTenantMode#MYSQL} and 078 * {@link EOBTenantMode#SYSTEM}. As of Phase 2 this is an 079 * {@link OceanBaseMysqlSqlParser} (forked MySQL grammar) rather than 080 * the unmodified {@link MySqlSqlParser} that Phase 1 used. 081 */ 082 private OceanBaseMysqlSqlParser mysqlDelegate; 083 084 /** 085 * Lazily-constructed delegate for {@link EOBTenantMode#ORACLE}. As 086 * of Phase 3 this is an {@link OceanBaseOracleSqlParser} (forked 087 * Oracle dual grammar) rather than the unmodified 088 * {@link OracleSqlParser} that Phases 1 and 2 used. 089 */ 090 private OceanBaseOracleSqlParser oracleDelegate; 091 092 /** 093 * Construct an OceanBase parser. Delegates are created lazily on the 094 * first parse call so a parser configured for {@code dbvoceanbase} 095 * but never actually used does not pay any allocation cost. 096 */ 097 public OceanBaseSqlParser() { 098 } 099 100 @Override 101 public EDbVendor getVendor() { 102 return EDbVendor.dbvoceanbase; 103 } 104 105 @Override 106 public SqlParseResult parse(ParserContext context) { 107 SqlParseResult earlyError = checkSystemPrefixInOracleMode(context); 108 if (earlyError != null) { 109 return earlyError; 110 } 111 return delegate(context).parse(context); 112 } 113 114 @Override 115 public SqlParseResult tokenize(ParserContext context) { 116 // Intentionally NOT gated by the system-prefix check: pure 117 // tokenization is always legal regardless of tenant mode. A caller 118 // that only wants the token stream should always get one. 119 return delegate(context).tokenize(context); 120 } 121 122 @Override 123 public SqlParseResult getrawsqlstatements(ParserContext context) { 124 // TGSqlParser.doparse() reaches the OceanBase parser through this 125 // method (via doDelegatedRawParse), so the mode gate must be here 126 // too — not only in parse() — or the diagnostic would fire only 127 // on direct SqlParser.parse() callers and miss the TGSqlParser 128 // entry point. 129 SqlParseResult earlyError = checkSystemPrefixInOracleMode(context); 130 if (earlyError != null) { 131 return earlyError; 132 } 133 return delegate(context).getrawsqlstatements(context); 134 } 135 136 /** 137 * When the active tenant mode is {@link EOBTenantMode#ORACLE}, scan 138 * the incoming SQL text for an unmistakable OceanBase system-tenant 139 * DDL prefix ({@code CREATE/ALTER/DROP TENANT}, 140 * {@code CREATE/ALTER/DROP RESOURCE POOL|UNIT}). If one is found, 141 * short-circuit with a targeted error rather than letting the Oracle 142 * grammar produce a confusing raw syntax error at the {@code TENANT} 143 * or {@code RESOURCE} token. 144 * 145 * <p>Per ADR-7 the diagnostic never auto-promotes mode — the caller 146 * must explicitly switch the parser via {@code setOBTenantMode(SYSTEM)} 147 * to parse such scripts. See {@code doc/oceanbase/tenant_mode_usage.md} 148 * for the recommended two-pass parsing pattern. 149 * 150 * @param context the current parser context 151 * @return an error {@link SqlParseResult} when a conflict is 152 * detected, or {@code null} when the script is safe to 153 * forward to the Oracle delegate 154 */ 155 private SqlParseResult checkSystemPrefixInOracleMode(ParserContext context) { 156 EOBTenantMode mode = context.getOceanBaseTenantMode(); 157 if (mode != EOBTenantMode.ORACLE) { 158 return null; 159 } 160 String conflict = TSqlCmdsOceanbase.detectSystemPrefixConflict(context.getSqlText()); 161 if (conflict == null) { 162 return null; 163 } 164 String message = "OceanBase: `" + conflict + "` is a system-tenant statement" 165 + " and cannot be parsed while the tenant mode is ORACLE." 166 + " System-tenant DDL is only valid against the OceanBase `sys` tenant," 167 + " which is lexically MySQL-family. Parse this statement with a" 168 + " separate TGSqlParser instance after calling" 169 + " setOBTenantMode(EOBTenantMode.SYSTEM), then keep the Oracle-mode" 170 + " parser for the tenant-local statements." 171 + " See gsp_java_core/doc/oceanbase/tenant_mode_usage.md for the" 172 + " recommended two-pass parsing pattern."; 173 174 TSyntaxError err = new TSyntaxError( 175 conflict, 176 1L, 177 1L, 178 message, 179 EErrorType.sperror, 180 0, 181 null, 182 -1 183 ); 184 185 return new SqlParseResult.Builder() 186 .errorCode(1) 187 .errorMessage(message) 188 .addSyntaxError(err) 189 .sqlStatements(new TStatementList()) 190 .build(); 191 } 192 193 /** 194 * Pick the right delegate for the current tenant mode. 195 * 196 * <p>{@link EOBTenantMode#SYSTEM} reuses the MySQL delegate because the 197 * system-tenant SQL surface is lexically MySQL-family (backticks, 198 * {@code ;} delimiter, no PL/SQL). The few system-only DDL forms 199 * ({@code CREATE TENANT}, {@code ALTER SYSTEM}, etc.) are recognized 200 * for boundary detection by {@code TSqlCmdsOceanbase}; full grammar 201 * support arrives in Phase 4. 202 * 203 * @param context the immutable parser context (carries the mirrored 204 * {@link EOBTenantMode} from {@code TGSqlParser}) 205 * @return the {@link MySqlSqlParser} or {@link OracleSqlParser} 206 * delegate; never null 207 */ 208 private SqlParser delegate(ParserContext context) { 209 EOBTenantMode mode = context.getOceanBaseTenantMode(); 210 if (mode == null) { 211 mode = EOBTenantMode.MYSQL; 212 } 213 switch (mode) { 214 case ORACLE: 215 if (oracleDelegate == null) { 216 oracleDelegate = new OceanBaseOracleSqlParser(); 217 } 218 return oracleDelegate; 219 case MYSQL: 220 case SYSTEM: 221 default: 222 if (mysqlDelegate == null) { 223 mysqlDelegate = new OceanBaseMysqlSqlParser(); 224 } 225 return mysqlDelegate; 226 } 227 } 228}