001package gudusoft.gsqlparser.dlineage; 002 003import gudusoft.gsqlparser.*; 004import gudusoft.gsqlparser.dlineage.dataflow.model.Option; 005import gudusoft.gsqlparser.dlineage.dataflow.model.SqlInfo; 006import gudusoft.gsqlparser.dlineage.statistics.*; 007import gudusoft.gsqlparser.nodes.TJoin; 008import gudusoft.gsqlparser.nodes.TJoinItem; 009import gudusoft.gsqlparser.nodes.TJoinItemList; 010import gudusoft.gsqlparser.nodes.TJoinList; 011import gudusoft.gsqlparser.stmt.*; 012import gudusoft.gsqlparser.util.Logger; 013import gudusoft.gsqlparser.util.LoggerFactory; 014import gudusoft.gsqlparser.util.SQLUtil; 015import gudusoft.gsqlparser.util.json.JSON; 016 017import java.io.File; 018import java.util.*; 019import java.util.regex.Matcher; 020import java.util.regex.Pattern; 021import java.util.stream.Collectors; 022 023/** 024 * SQL file statistics utility class for analyzing various statistical information in SQL files. 025 * Supports both multi-file statistics and single SQL statement statistics modes. 026 * 027 * Single SQL statistics functionality: 028 * 1. Create an instance using constructor SQLFileStatistics(String sqlContent, Option option) 029 * 2. Call generateSingleSQLStatistics() method to get statistics results 030 * 3. Or use the static convenience method analyzeSingleSQL(String sqlContent, Option option) 031 * 032 * Single SQL statistics features: 033 * - File size: counts the number of characters in SQL content 034 * - Line count: counts the number of lines in SQL content 035 * - Other statistics: same as file statistics, including statement types, JOINs, subqueries, etc. 036 * 037 * Usage example: 038 * <pre> 039 * String sql = "SELECT * FROM table WHERE id = 1"; 040 * Option option = new Option(); 041 * option.setVendor(EDbVendor.dbvmysql); 042 * 043 * // Method 1: Using static method 044 * String result = SQLFileStatistics.analyzeSingleSQL(sql, option); 045 * 046 * // Method 2: Using instance method 047 * SQLFileStatistics analyzer = new SQLFileStatistics(sql, option); 048 * String result = analyzer.generateSingleSQLStatistics(); 049 * </pre> 050 */ 051public class SQLFileStatistics { 052 private static final Logger logger = LoggerFactory.getLogger(SQLFileStatistics.class); 053 054 private SqlInfo[] sqlInfos; 055 private Option option; 056 private List<FileStatistics> fileStatisticsList = new ArrayList<>(); 057 058 public SQLFileStatistics(SqlInfo[] sqlInfos, Option option) { 059 if (sqlInfos == null) { 060 this.sqlInfos = new SqlInfo[0]; 061 } else { 062 this.sqlInfos = new SqlInfo[sqlInfos.length]; 063 for (int i = 0; i < sqlInfos.length; i++) { 064 if (sqlInfos[i] == null) { 065 this.sqlInfos[i] = new SqlInfo(); 066 this.sqlInfos[i].setSql(""); 067 this.sqlInfos[i].setOriginIndex(i); 068 } else { 069 this.sqlInfos[i] = sqlInfos[i]; 070 this.sqlInfos[i].setOriginIndex(i); 071 } 072 } 073 } 074 this.option = option; 075 } 076 077 /** 078 * Merge analysis results from another SQLFileStatistics object into the current object 079 * 080 * @param other The SQLFileStatistics object to merge 081 */ 082 public void merge(SQLFileStatistics other) { 083 if (other == null || other.fileStatisticsList == null || other.fileStatisticsList.isEmpty()) { 084 return; 085 } 086 087 // Create a map of current file statistics for quick lookup 088 Map<String, FileStatistics> currentFileStatsMap = new HashMap<>(); 089 for (FileStatistics stats : this.fileStatisticsList) { 090 currentFileStatsMap.put(stats.getFile(), stats); 091 } 092 093 // Merge file statistics from another object 094 for (FileStatistics otherStats : other.fileStatisticsList) { 095 if (currentFileStatsMap.containsKey(otherStats.getFile())) { 096 // If the current object already has statistics for this file, merge them 097 FileStatistics currentStats = currentFileStatsMap.get(otherStats.getFile()); 098 mergeFileStatistics(currentStats, otherStats); 099 } else { 100 // If the current object doesn't have statistics for this file, add it directly 101 this.fileStatisticsList.add(otherStats); 102 currentFileStatsMap.put(otherStats.getFile(), otherStats); 103 } 104 } 105 } 106 107 /** 108 * Merge two FileStatistics objects 109 * 110 * @param target Target FileStatistics object, where merged results will be stored 111 * @param source Source FileStatistics object, which will not be changed after merging 112 */ 113 private void mergeFileStatistics(FileStatistics target, FileStatistics source) { 114 // Merge count-type statistics 115 target.setTotal_statements(target.getTotal_statements() + source.getTotal_statements()); 116 target.setSelect_count(target.getSelect_count() + source.getSelect_count()); 117 target.setInsert_count(target.getInsert_count() + source.getInsert_count()); 118 target.setUpdate_count(target.getUpdate_count() + source.getUpdate_count()); 119 target.setDelete_count(target.getDelete_count() + source.getDelete_count()); 120 target.setMerge_count(target.getMerge_count() + source.getMerge_count()); 121 target.setCreate_table_count(target.getCreate_table_count() + source.getCreate_table_count()); 122 target.setCreate_temp_table_count(target.getCreate_temp_table_count() + source.getCreate_temp_table_count()); 123 target.setCtas_count(target.getCtas_count() + source.getCtas_count()); 124 target.setCreate_view_count(target.getCreate_view_count() + source.getCreate_view_count()); 125 target.setCreate_temp_view_count(target.getCreate_temp_view_count() + source.getCreate_temp_view_count()); 126 target.setDrop_count(target.getDrop_count() + source.getDrop_count()); 127 target.setTruncate_count(target.getTruncate_count() + source.getTruncate_count()); 128 target.setParse_error_count(target.getParse_error_count() + source.getParse_error_count()); 129 target.setJoin_count(target.getJoin_count() + source.getJoin_count()); 130 131 // Merge subquery depth (take maximum value) 132 if (source.getSubquery_depth() > target.getSubquery_depth()) { 133 target.setSubquery_depth(source.getSubquery_depth()); 134 } 135 136 // Merge other counts 137 target.setCte_count(target.getCte_count() + source.getCte_count()); 138 target.setCase_count(target.getCase_count() + source.getCase_count()); 139 target.setUnion_count(target.getUnion_count() + source.getUnion_count()); 140 target.setWindow_function_count(target.getWindow_function_count() + source.getWindow_function_count()); 141 target.setAggregate_function_count(target.getAggregate_function_count() + source.getAggregate_function_count()); 142 target.setWhere_predicate_count(target.getWhere_predicate_count() + source.getWhere_predicate_count()); 143 144 // Merge JOIN type statistics 145 for (Map.Entry<String, Integer> entry : source.getJoin_types().entrySet()) { 146 String joinType = entry.getKey(); 147 int count = entry.getValue(); 148 target.getJoin_types().put(joinType, target.getJoin_types().getOrDefault(joinType, 0) + count); 149 } 150 151 // Merge table references 152 target.addTableReferences(source.getDistinct_table_references()); 153 154 // Merge created objects 155 for (String object : source.getObjects_created()) { 156 if (!target.getObjects_created().contains(object)) { 157 target.addObject_created(object); 158 } 159 } 160 161 // Merge read objects 162 for (String object : source.getObjects_read()) { 163 target.addObject_read(object); 164 } 165 } 166 167 public SQLFileStatistics(String sqlContent, Option option) { 168 SqlInfo[] sqlInfos = new SqlInfo[1]; 169 SqlInfo info = new SqlInfo(); 170 info.setSql(sqlContent); 171 info.setOriginIndex(0); 172 sqlInfos[0] = info; 173 this.sqlInfos = sqlInfos; 174 this.option = option; 175 } 176 177 public SQLFileStatistics(File[] sqlFiles, Option option) { 178 SqlInfo[] sqlInfos = new SqlInfo[sqlFiles.length]; 179 for (int i = 0; i < sqlFiles.length; i++) { 180 SqlInfo info = new SqlInfo(); 181 info.setSql(SQLUtil.getFileContent(sqlFiles[i])); 182 info.setFileName(sqlFiles[i].getName()); 183 info.setFilePath(sqlFiles[i].getAbsolutePath()); 184 info.setOriginIndex(0); 185 sqlInfos[i] = info; 186 } 187 this.sqlInfos = sqlInfos; 188 this.option = option; 189 } 190 191 /** 192 * Extract original file name from split file name 193 * Split file name format: original_filename_file_index_start_line_end_line.extension 194 * 195 * @param filename Split file name 196 * @return Original file name, return the original file name if it's not a split file 197 */ 198 private String getOriginalFileName(String filename) { 199 if (filename == null || filename.isEmpty()) { 200 return filename; 201 } 202 203 // Use regular expression to match split file name format 204 // Format: original_filename_number_number_number.extension 205 String regex = "^(.*?)_\\d+_\\d+_\\d+\\.(.*)$"; 206 Pattern pattern = Pattern.compile(regex); 207 Matcher matcher = pattern.matcher(filename); 208 209 if (matcher.matches()) { 210 // If match is successful, return original file name (originalFilename.extension) 211 return matcher.group(1) + "." + matcher.group(2); 212 } 213 214 // If not a split file, return original file name 215 return filename; 216 } 217 218 /** 219 * Extract original file name from file path 220 * 221 * @param filePath File path 222 * @return Original file name, return the original file path if it's not a split file 223 */ 224 private String getOriginalFilePath(String filePath) { 225 if (filePath == null || filePath.isEmpty()) { 226 return filePath; 227 } 228 229 File file = new File(filePath); 230 String filename = file.getName(); 231 String originalFilename = getOriginalFileName(filename); 232 233 if (originalFilename.equals(filename)) { 234 // If not a split file, return original file path 235 return filePath; 236 } 237 238 // If it is a split file, return original file path 239 return new File(file.getParent(), originalFilename).getAbsolutePath(); 240 } 241 242 public synchronized String generateSQLQueryStatistics() { 243 if (sqlInfos == null || sqlInfos.length == 0) { 244 return JSON.toJSONString(Collections.emptyMap()); 245 } 246 247 // Get first SQL information 248 SqlInfo sqlInfo = sqlInfos[0]; 249 if (sqlInfo == null || sqlInfo.getSql() == null || sqlInfo.getSql().trim().isEmpty()) { 250 return JSON.toJSONString(Collections.emptyMap()); 251 } 252 253 // Create statistics object for a single SQL 254 FileStatistics sqlStats = new FileStatistics("sql_query"); 255 256 // For a single SQL, set SQL content length as "file size" and SQL line count as "line count" 257 String sqlContent = sqlInfo.getSql(); 258 if (sqlContent != null) { 259 sqlStats.setFile_size(sqlContent.length()); 260 261 // Calculate SQL line count 262 int lineCount = sqlContent.split("\n").length; 263 sqlStats.setLine_count(lineCount); 264 } 265 266 // Cache vendor type to avoid repeated calls 267 EDbVendor vendor = option.getVendor(); 268 269 // Pre-create visitor objects to avoid repeated creation 270 SubqueryDepthVisitor subqueryDepthVisitor = new SubqueryDepthVisitor(); 271 CaseVisitor caseVisitor = new CaseVisitor(); 272 WindowFunctionVisitor windowFunctionVisitor = new WindowFunctionVisitor(); 273 AggregateFunctionVisitor aggregateFunctionVisitor = new AggregateFunctionVisitor(); 274 TableReferenceVisitor tableReferenceVisitor = new TableReferenceVisitor(); 275 WherePredicateVisitor wherePredicateVisitor = new WherePredicateVisitor(); 276 277 // Pre-create SQL parser to avoid repeated creation 278 TGSqlParser sqlparser = new TGSqlParser(vendor); 279 sqlparser.sqltext = sqlInfo.getSql(); 280 281 int parseResult = sqlparser.parse(); 282 if (parseResult != 0) { 283 logger.warn("SQL parse error: " + sqlparser.getErrormessage()); 284 // Use sqlparser.getErrorCount() to get the number of parse errors 285 sqlStats.setParse_error_count(sqlparser.getErrorCount()); 286 } 287 288 // Get statement list to avoid repeated calls 289 int statementCount = sqlparser.sqlstatements.size(); 290 291 // Count each SQL statement 292 for (int i = 0; i < statementCount; i++) { 293 TCustomSqlStatement stmt = sqlparser.sqlstatements.get(i); 294 sqlStats.incrementTotal_statements(); 295 296 // Count statement type 297 countStatementType(sqlStats, stmt); 298 299 // Count JOIN-related metrics 300 countJoins(sqlStats, stmt); 301 302 // Count subquery depth - reset visitor state 303 subqueryDepthVisitor.reset(); 304 stmt.acceptChildren(subqueryDepthVisitor); 305 int subqueryDepth = subqueryDepthVisitor.getMaxDepth(); 306 if (subqueryDepth > sqlStats.getSubquery_depth()) { 307 sqlStats.setSubquery_depth(subqueryDepth); 308 } 309 310 // Count CTEs 311 countCTEs(sqlStats, stmt); 312 313 // Count CASE expressions - reset visitor state 314 caseVisitor.reset(); 315 stmt.acceptChildren(caseVisitor); 316 sqlStats.addCase_count(caseVisitor.getCaseCount()); 317 318 // Count UNIONs 319 countUnions(sqlStats, stmt); 320 321 // Count window functions - reset visitor state 322 windowFunctionVisitor.reset(); 323 stmt.acceptChildren(windowFunctionVisitor); 324 sqlStats.addWindow_function_count(windowFunctionVisitor.getWindowFunctionCount()); 325 326 // Count aggregate functions - reset visitor state 327 aggregateFunctionVisitor.reset(); 328 stmt.acceptChildren(aggregateFunctionVisitor); 329 sqlStats.addAggregate_function_count(aggregateFunctionVisitor.getAggregateFunctionCount()); 330 331 // Count table references - reset visitor state 332 tableReferenceVisitor.reset(); 333 stmt.acceptChildren(tableReferenceVisitor); 334 sqlStats.addTableReferences(tableReferenceVisitor.getTableReferences()); 335 336 // For SELECT statements, collect referenced tables 337 if (stmt instanceof TSelectSqlStatement) { 338 tableReferenceVisitor.getTableReferences().forEach(table -> { 339 sqlStats.addObject_read(table); 340 }); 341 } 342 343 // Count WHERE/HAVING predicates - reset visitor state 344 if (stmt.getWhereClause() != null) { 345 wherePredicateVisitor.reset(); 346 stmt.getWhereClause().acceptChildren(wherePredicateVisitor); 347 sqlStats.addWhere_predicate_count(wherePredicateVisitor.getPredicateCount()); 348 } 349 if (stmt.getStatements() != null) { 350 for (int j = 0; j < stmt.getStatements().size(); j++) { 351 TCustomSqlStatement subStmt = stmt.getStatements().get(j); 352 if (subStmt.getWhereClause() == null) { 353 continue; 354 } 355 wherePredicateVisitor.reset(); 356 subStmt.getWhereClause().acceptChildren(wherePredicateVisitor); 357 sqlStats.addWhere_predicate_count(wherePredicateVisitor.getPredicateCount()); 358 } 359 } 360 } 361 362 // Generate JSON output 363 Map result = new LinkedHashMap(); 364 result.put("file_statistics", sqlStats.toJSON()); 365 return JSON.toJSONString(result); 366 } 367 368 /** 369 * Static method: Conveniently analyze a single SQL statement 370 * 371 * @param sqlContent SQL content to analyze 372 * @param option Analysis options 373 * @return Statistical results in JSON format 374 */ 375 public static String analyzeSingleSQL(String sqlContent, Option option) { 376 SQLFileStatistics analyzer = new SQLFileStatistics(sqlContent, option); 377 return analyzer.generateSQLQueryStatistics(); 378 } 379 380 public synchronized String generateFileStatistics() { 381 if (sqlInfos == null) { 382 return JSON.toJSONString(Collections.emptyMap()); 383 } 384 385 // Group statistics by file - identify and merge split files 386 Map<String, List<SqlInfo>> fileSqlInfoMap = Arrays.stream(sqlInfos) 387 .filter(info -> info != null && info.getSql() != null && !info.getSql().trim().isEmpty()) 388 .collect(Collectors.groupingBy(info -> { 389 if (info.getFilePath() != null && !info.getFilePath().isEmpty()) { 390 // If file path exists, check whether it is a split file 391 return getOriginalFilePath(info.getFilePath()); 392 } else if (info.getFileName() != null && !info.getFileName().isEmpty()) { 393 // If only file name exists, check whether it is a split file 394 return getOriginalFileName(info.getFileName()); 395 } else { 396 return "anonymous.sql"; 397 } 398 })); 399 400 // Perform statistics for each file 401 // Cache vendor type to avoid repeated calls 402 EDbVendor vendor = option.getVendor(); 403 404 // Pre-create visitor objects to avoid repeated creation 405 SubqueryDepthVisitor subqueryDepthVisitor = new SubqueryDepthVisitor(); 406 CaseVisitor caseVisitor = new CaseVisitor(); 407 WindowFunctionVisitor windowFunctionVisitor = new WindowFunctionVisitor(); 408 AggregateFunctionVisitor aggregateFunctionVisitor = new AggregateFunctionVisitor(); 409 TableReferenceVisitor tableReferenceVisitor = new TableReferenceVisitor(); 410 WherePredicateVisitor wherePredicateVisitor = new WherePredicateVisitor(); 411 412 // Pre-create SQL parser to avoid repeated creation 413 TGSqlParser sqlparser = new TGSqlParser(vendor); 414 415 // Traverse each file 416 for (Map.Entry<String, List<SqlInfo>> entry : fileSqlInfoMap.entrySet()) { 417 String filePath = entry.getKey(); 418 List<SqlInfo> fileSqlInfos = entry.getValue(); 419 420 // Pre-allocate FileStatistics object 421 FileStatistics fileStats = new FileStatistics(filePath); 422 423 // Collect file size and total line count 424 File file = new File(filePath); 425 collectSQLFileInfo(file, fileStats, filePath); 426 427 // Parse all SQL statements in the file 428 for (SqlInfo sqlInfo : fileSqlInfos) { 429 // Reset parser state and set new SQL text 430 sqlparser.sqltext = sqlInfo.getSql(); 431 432 int parseResult = sqlparser.parse(); 433 if (parseResult != 0) { 434 logger.warn("File: " + filePath + " SQL parse error: " + sqlparser.getErrormessage()); 435 // Use sqlparser.getErrorCount() to get the number of parse errors 436 fileStats.setParse_error_count(fileStats.getParse_error_count() + sqlparser.getErrorCount()); 437 } 438 // Get statement list to avoid repeated calls 439 int statementCount = sqlparser.sqlstatements.size(); 440 441 // Count each SQL statement 442 for (int i = 0; i < statementCount; i++) { 443 TCustomSqlStatement stmt = sqlparser.sqlstatements.get(i); 444 fileStats.incrementTotal_statements(); 445 446 // Count statement type 447 countStatementType(fileStats, stmt); 448 449 // Count JOIN-related metrics 450 countJoins(fileStats, stmt); 451 452 // Count subquery depth - reset visitor state 453 subqueryDepthVisitor.reset(); 454 stmt.acceptChildren(subqueryDepthVisitor); 455 int subqueryDepth = subqueryDepthVisitor.getMaxDepth(); 456 if (subqueryDepth > fileStats.getSubquery_depth()) { 457 fileStats.setSubquery_depth(subqueryDepth); 458 } 459 460 // Count CTEs 461 countCTEs(fileStats, stmt); 462 463 // Count CASE expressions - reset visitor state 464 caseVisitor.reset(); 465 stmt.acceptChildren(caseVisitor); 466 fileStats.addCase_count(caseVisitor.getCaseCount()); 467 468 // Count UNIONs 469 countUnions(fileStats, stmt); 470 471 // Count window functions - reset visitor state 472 windowFunctionVisitor.reset(); 473 stmt.acceptChildren(windowFunctionVisitor); 474 fileStats.addWindow_function_count(windowFunctionVisitor.getWindowFunctionCount()); 475 476 // Count aggregate functions - reset visitor state 477 aggregateFunctionVisitor.reset(); 478 stmt.acceptChildren(aggregateFunctionVisitor); 479 fileStats.addAggregate_function_count(aggregateFunctionVisitor.getAggregateFunctionCount()); 480 481 // Count table references - reset visitor state 482 tableReferenceVisitor.reset(); 483 stmt.acceptChildren(tableReferenceVisitor); 484 fileStats.addTableReferences(tableReferenceVisitor.getTableReferences()); 485 486 // For SELECT statements, collect referenced tables 487 if (stmt instanceof TSelectSqlStatement) { 488 tableReferenceVisitor.getTableReferences().forEach(table -> { 489 fileStats.addObject_read(table); 490 }); 491 } 492 493 // Count WHERE/HAVING predicates - reset visitor state 494 if (stmt.getWhereClause() != null) { 495 wherePredicateVisitor.reset(); 496 stmt.getWhereClause().acceptChildren(wherePredicateVisitor); 497 fileStats.addWhere_predicate_count(wherePredicateVisitor.getPredicateCount()); 498 } 499 if (stmt.getStatements() != null) { 500 for (int j = 0; j < stmt.getStatements().size(); j++) { 501 TCustomSqlStatement subStmt = stmt.getStatements().get(j); 502 if (subStmt.getWhereClause() == null) { 503 continue; 504 } 505 wherePredicateVisitor.reset(); 506 subStmt.getWhereClause().acceptChildren(wherePredicateVisitor); 507 fileStats.addWhere_predicate_count(wherePredicateVisitor.getPredicateCount()); 508 } 509 } 510 } 511 } 512 513 fileStatisticsList.add(fileStats); 514 } 515 516 // Generate JSON output 517 Map result = new LinkedHashMap(); 518 List fileStatsArray = new ArrayList(); 519 520 for (FileStatistics stat : fileStatisticsList) { 521 fileStatsArray.add(stat.toJSON()); 522 } 523 524 result.put("file_statistics", fileStatsArray); 525 return JSON.toJSONString(result); 526 } 527 528 private void collectSQLFileInfo(File file, FileStatistics fileStats, String filePath) { 529 if (file.exists()) { 530 // Collect file size 531 fileStats.setFile_size(file.length()); 532 533 // Collect total line count 534 try (java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.FileReader(file))) { 535 int lineCount = 0; 536 while (reader.readLine() != null) { 537 lineCount++; 538 } 539 fileStats.setLine_count(lineCount); 540 } catch (java.io.IOException e) { 541 logger.warn("Error collecting total line count for file: " + filePath + ", message: " + e.getMessage()); 542 } 543 } 544 } 545 546 private void countStatementType(FileStatistics fileStats, TCustomSqlStatement stmt) { 547 if (stmt instanceof TSelectSqlStatement) { 548 fileStats.incrementSelect_count(); 549 } else if (stmt instanceof TInsertSqlStatement) { 550 fileStats.incrementInsert_count(); 551 } else if (stmt instanceof TUpdateSqlStatement) { 552 fileStats.incrementUpdate_count(); 553 } else if (stmt instanceof TDeleteSqlStatement) { 554 fileStats.incrementDelete_count(); 555 } else if (stmt instanceof TMergeSqlStatement) { 556 fileStats.incrementMerge_count(); 557 } else if (stmt instanceof TCreateTableSqlStatement) { 558 TCreateTableSqlStatement createTableStmt = (TCreateTableSqlStatement) stmt; 559 // Check whether it is a temporary table 560 if (createTableStmt.getTableKinds() != null && ( 561 createTableStmt.getTableKinds().contains(ETableKind.etkTemporary) || 562 createTableStmt.getTableKinds().contains(ETableKind.etkTemp) || 563 createTableStmt.getTableKinds().contains(ETableKind.etkLocalTemporary) || 564 createTableStmt.getTableKinds().contains(ETableKind.etkLocalTemp) || 565 createTableStmt.getTableKinds().contains(ETableKind.etkGlobalTemporary) || 566 createTableStmt.getTableKinds().contains(ETableKind.etkGlobalTemp))) { 567 fileStats.incrementCreate_temp_table_count(); 568 } else { 569 fileStats.incrementCreate_table_count(); 570 } 571 // Check whether it is CTAS 572 if (createTableStmt.getSubQuery() != null) { 573 fileStats.incrementCtas_count(); 574 } 575 576 fileStats.addObject_created(createTableStmt.getTargetTable().toString()); 577 } else if (stmt instanceof TCreateViewSqlStatement) { 578 TCreateViewSqlStatement createViewStmt = (TCreateViewSqlStatement) stmt; 579 // Check whether it is a temporary view 580 boolean isTempView = false; 581 if (createViewStmt.getTableKind() != null) { 582 ETableKind tableKind = createViewStmt.getTableKind(); 583 isTempView = (tableKind == ETableKind.etkTemporary || 584 tableKind == ETableKind.etkTemp || 585 tableKind == ETableKind.etkLocalTemporary || 586 tableKind == ETableKind.etkLocalTemp || 587 tableKind == ETableKind.etkGlobalTemporary || 588 tableKind == ETableKind.etkGlobalTemp); 589 } 590 591 if (isTempView) { 592 fileStats.incrementCreate_temp_view_count(); 593 } else { 594 fileStats.incrementCreate_view_count(); 595 } 596 597 fileStats.addObject_created(createViewStmt.getViewName().toString()); 598 } else if (stmt.sqlstatementtype.toString().toLowerCase().startsWith("sstdrop")) { 599 fileStats.incrementDrop_count(); 600 } else if (stmt instanceof TTruncateStatement) { 601 fileStats.incrementTruncate_count(); 602 } 603 } 604 605 private void countJoins(FileStatistics fileStats, TCustomSqlStatement stmt) { 606 // Use a set to track processed joins to avoid duplicate counting 607 Set<TJoin> processedJoins = new HashSet<>(); 608 // Recursively process all statements and their child statements 609 processStatementsForJoins(fileStats, stmt, processedJoins); 610 } 611 612 /** 613 * Recursively process all statements and their child statements to collect join information 614 * 615 * @param fileStats statistics object 616 * @param stmt current statement being processed 617 * @param processedJoins set of processed joins for de-duplication 618 */ 619 private void processStatementsForJoins(FileStatistics fileStats, TCustomSqlStatement stmt, Set<TJoin> processedJoins) { 620 if (stmt == null) { 621 return; 622 } 623 624 // Process joins for current statement 625 TJoinList joins = stmt.getJoins(); 626 if (joins != null && joins.size() > 0) { 627 for (int i = 0; i < joins.size(); i++) { 628 TJoin join = joins.getJoin(i); 629 // Check whether this join has already been processed 630 if (!processedJoins.contains(join)) { 631 processedJoins.add(join); 632 TJoinItemList joinItems = join.getJoinItems(); 633 if (joinItems != null && joinItems.size() > 0) { 634 for (int j = 0; j < joinItems.size(); j++) { 635 TJoinItem joinItem = joinItems.getJoinItem(j); 636 EJoinType joinType = joinItem.getJoinType(); 637 if (joinType != null) { 638 fileStats.incrementJoin_count(); 639 640 // Classify concrete join types into five main types: INNER, LEFT, RIGHT, FULL, CROSS 641 String joinTypeStr = joinType.toString().toLowerCase(); 642 String mainJoinType = ""; 643 644 // Determine main type based on specific join type 645 switch (joinType) { 646 case inner: 647 case join: 648 case natural_inner: 649 mainJoinType = "inner"; 650 break; 651 case left: 652 case leftouter: 653 case natural_left: 654 case natural_leftouter: 655 case leftsemi: 656 case leftanti: 657 mainJoinType = "left"; 658 break; 659 case right: 660 case rightouter: 661 case natural_right: 662 case natural_rightouter: 663 mainJoinType = "right"; 664 break; 665 case full: 666 case fullouter: 667 case natural_full: 668 case natural_fullouter: 669 mainJoinType = "full"; 670 break; 671 case cross: 672 mainJoinType = "cross"; 673 break; 674 default: 675 // For other types, keep original type string 676 mainJoinType = joinTypeStr; 677 break; 678 } 679 680 fileStats.addJoin_type(mainJoinType); 681 } 682 } 683 } 684 } 685 } 686 } 687 688 // Process child statements 689 TStatementList statements = stmt.getStatements(); 690 if (statements != null && statements.size() > 0) { 691 for (TCustomSqlStatement subStmt : statements) { 692 processStatementsForJoins(fileStats, subStmt, processedJoins); 693 } 694 } 695 } 696 697 private void countCTEs(FileStatistics fileStats, TCustomSqlStatement stmt) { 698 if (stmt.getCteList() != null) { 699 fileStats.addCte_count(stmt.getCteList().size()); 700 } 701 } 702 703 private void countUnions(FileStatistics fileStats, TCustomSqlStatement stmt) { 704 if (stmt instanceof TSelectSqlStatement) { 705 TSelectSqlStatement selectStmt = (TSelectSqlStatement) stmt; 706 if (selectStmt.getSetOperatorType() != ESetOperatorType.none) { 707 // Count this union operation 708 fileStats.incrementUnion_count(); 709 710 // Recursively process left and right statements 711 countUnions(fileStats, selectStmt.getLeftStmt()); 712 countUnions(fileStats, selectStmt.getRightStmt()); 713 } 714 } 715 } 716 717 718}