001package gudusoft.gsqlparser.parser; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.EErrorType; 005import gudusoft.gsqlparser.EFindSqlStateType; 006import gudusoft.gsqlparser.ESqlStatementType; 007import gudusoft.gsqlparser.ETokenType; 008import gudusoft.gsqlparser.PowerQueryGrammarParser; 009import gudusoft.gsqlparser.PowerQueryLexer; 010import gudusoft.gsqlparser.TBaseType; 011import gudusoft.gsqlparser.TCustomLexer; 012import gudusoft.gsqlparser.TCustomParser; 013import gudusoft.gsqlparser.TCustomSqlStatement; 014import gudusoft.gsqlparser.TSourceToken; 015import gudusoft.gsqlparser.TSourceTokenList; 016import gudusoft.gsqlparser.TStatementList; 017import gudusoft.gsqlparser.TSyntaxError; 018import gudusoft.gsqlparser.parser.powerquery.PowerQueryTokenizer; 019import gudusoft.gsqlparser.sqlcmds.SqlCmdsFactory; 020import gudusoft.gsqlparser.stmt.TUnknownSqlStatement; 021import gudusoft.gsqlparser.stmt.powerquery.TPowerQueryDocumentStmt; 022 023import java.io.BufferedReader; 024import java.io.IOException; 025 026/** 027 * Power Query M-Language SQL parser (Tier 1 + Tier 2 extractor). 028 * 029 * <p>Template-method implementation of {@link AbstractSqlParser} that 030 * treats every M input as a <b>single</b> document statement. M is 031 * expression-oriented — there is no meaningful multi-statement split — 032 * so {@link #extractVendorRawStatements} always produces exactly one 033 * {@link TPowerQueryDocumentStmt} per input. 034 * 035 * <p>Tokenization is done by {@link PowerQueryTokenizer} (hand-written), 036 * not by a generated DFA table. The document parser 037 * ({@link gudusoft.gsqlparser.parser.powerquery.PowerQueryDocumentParser}) 038 * runs inside {@link TPowerQueryDocumentStmt#doParseStatement} and walks 039 * the token list to build the step graph. 040 * 041 * <p>Lineage analysis is delegated to {@code TPowerQueryAnalyzer} in 042 * {@code dlineage/impl/powerquery/}, which is wired up through 043 * {@code DataFlowAnalyzerFactory}. 044 */ 045public class PowerQuerySqlParser extends AbstractSqlParser { 046 047 public PowerQueryLexer flexer; 048 private final PowerQueryGrammarParser fparser; 049 private TCustomSqlStatement gcurrentsqlstatement; 050 051 public PowerQuerySqlParser() { 052 super(EDbVendor.dbvpowerquery); 053 this.delimiterChar = ';'; 054 this.defaultDelimiterStr = ";"; 055 056 this.flexer = new PowerQueryLexer(); 057 this.flexer.delimiterchar = this.delimiterChar; 058 this.flexer.defaultDelimiterStr = this.defaultDelimiterStr; 059 this.lexer = this.flexer; 060 061 this.fparser = new PowerQueryGrammarParser(null); 062 this.fparser.lexer = this.flexer; 063 } 064 065 // ========== Abstract methods ========== 066 067 @Override 068 protected TCustomLexer getLexer(ParserContext context) { 069 return this.flexer; 070 } 071 072 @Override 073 protected TCustomParser getParser(ParserContext context, TSourceTokenList tokens) { 074 this.fparser.sourcetokenlist = tokens; 075 return this.fparser; 076 } 077 078 @Override 079 protected TCustomParser getSecondaryParser(ParserContext context, TSourceTokenList tokens) { 080 return null; 081 } 082 083 @Override 084 protected void tokenizeVendorSql() { 085 dopowerquerytexttotokenlist(); 086 } 087 088 @Override 089 protected void setupVendorParsersForExtraction() { 090 this.fparser.sqlcmds = this.sqlcmds; 091 this.fparser.sourcetokenlist = this.sourcetokenlist; 092 } 093 094 @Override 095 protected void extractVendorRawStatements(SqlParseResult.Builder builder) { 096 dopowerquerygetrawsqlstatements(builder); 097 } 098 099 // ========== Power-Query-specific tokenization ========== 100 101 /** 102 * Hand-written M tokenizer path. Reads everything the 103 * {@link AbstractSqlParser} reader has buffered for us and populates 104 * {@link #sourcetokenlist} directly — sidestepping the DFA-driven 105 * {@code yylex} loop that every other vendor uses. 106 */ 107 private void dopowerquerytexttotokenlist() { 108 try { 109 BufferedReader input = this.flexer.yyinput; 110 StringBuilder sb = new StringBuilder(); 111 if (input != null) { 112 char[] buf = new char[4096]; 113 int n; 114 while ((n = input.read(buf)) != -1) { 115 sb.append(buf, 0, n); 116 } 117 } 118 PowerQueryTokenizer tok = new PowerQueryTokenizer(sb.toString()); 119 tok.tokenizeInto(this.sourcetokenlist); 120 } catch (IOException ioe) { 121 this.syntaxErrors.add(new TSyntaxError("", 0, 0, 122 "Failed to read Power Query input: " + ioe.getMessage(), 123 EErrorType.sperror, TBaseType.MSG_WARNING_ERROR_WHEN_TOKENIZE, 124 null, 0)); 125 } 126 } 127 128 // ========== Raw statement extraction ========== 129 130 /** 131 * M is expression-oriented: the whole input is one document 132 * statement. We still route through the 133 * {@link EFindSqlStateType} state machine so diagnostics and the 134 * {@link #onRawStatementComplete} hook behave like every other 135 * vendor. 136 */ 137 private void dopowerquerygetrawsqlstatements(SqlParseResult.Builder builder) { 138 gcurrentsqlstatement = null; 139 EFindSqlStateType gst = EFindSqlStateType.stnormal; 140 141 for (int i = 0; i < sourcetokenlist.size(); i++) { 142 TSourceToken tok = sourcetokenlist.get(i); 143 sourcetokenlist.curpos = i; 144 145 if (isTrivia(tok)) { 146 if (gcurrentsqlstatement != null) { 147 gcurrentsqlstatement.sourcetokenlist.add(tok); 148 } 149 continue; 150 } 151 152 if (gst == EFindSqlStateType.stnormal) { 153 gcurrentsqlstatement = new TPowerQueryDocumentStmt(vendor); 154 gcurrentsqlstatement.sourcetokenlist.add(tok); 155 gst = EFindSqlStateType.stsql; 156 } else { 157 gcurrentsqlstatement.sourcetokenlist.add(tok); 158 } 159 } 160 161 if (gcurrentsqlstatement != null) { 162 onRawStatementComplete(parserContext, gcurrentsqlstatement, fparser, 163 null, sqlstatements, true, builder); 164 } else { 165 // Empty input — emit an unknown statement so downstream code has 166 // something to look at. 167 TUnknownSqlStatement empty = new TUnknownSqlStatement(vendor); 168 empty.sqlstatementtype = ESqlStatementType.sstunknown; 169 onRawStatementComplete(parserContext, empty, fparser, null, 170 sqlstatements, true, builder); 171 } 172 builder.sqlStatements(this.sqlstatements); 173 } 174 175 @Override 176 protected TStatementList performParsing(ParserContext context, 177 TCustomParser parser, 178 TCustomParser secondaryParser, 179 TSourceTokenList tokens, 180 TStatementList rawStatements) { 181 this.sourcetokenlist = tokens; 182 this.parserContext = context; 183 this.sqlstatements = rawStatements; 184 185 if (this.sqlcmds == null) { 186 this.sqlcmds = SqlCmdsFactory.get(vendor); 187 } 188 this.fparser.sqlcmds = this.sqlcmds; 189 190 initializeGlobalContext(); 191 192 for (int i = 0; i < sqlstatements.size(); i++) { 193 TCustomSqlStatement stmt = sqlstatements.getRawSql(i); 194 try { 195 stmt.setFrameStack(frameStack); 196 int parseResult = stmt.parsestatement(null, false, 197 context.isOnlyNeedRawParseTree()); 198 if (parseResult != 0 || stmt.getErrorCount() > 0) { 199 copyErrorsFromStatement(stmt); 200 } 201 } catch (Exception ex) { 202 handleStatementParsingException(stmt, i, ex); 203 } 204 } 205 206 if (globalFrame != null) globalFrame.popMeFromStack(frameStack); 207 208 return sqlstatements; 209 } 210 211 // ========== helpers ========== 212 213 private boolean isTrivia(TSourceToken tok) { 214 if (tok == null) return true; 215 if (tok.tokentype == ETokenType.ttwhitespace) return true; 216 if (tok.tokentype == ETokenType.ttreturn) return true; 217 if (tok.tokentype == ETokenType.ttsimplecomment) return true; 218 if (tok.tokentype == ETokenType.ttbracketedcomment) return true; 219 if (tok.tokencode == TBaseType.lexspace) return true; 220 if (tok.tokencode == TBaseType.lexnewline) return true; 221 return tok.tokencode == TBaseType.cmtdoublehyphen 222 || tok.tokencode == TBaseType.cmtslashstar; 223 } 224}