001package gudusoft.gsqlparser.tools; 002 003/** 004 * Function of this class : 005 * 006 * 1. Take a directory path as input 007 * 2. Read all the sql files in the directory recursively and read the content of the file with .sql extension 008 * 3. Guess the database vendor based on the directory name or file name, if the db vendor is guessed from the directory name, 009 * then all the files in the directory should be of the same db vendor,otherwise guess the db vendor from the filename, otherwise use the default db vendor as mysql 010 * 4. Use TGSqlParser.getrawsqlstatements() to get the raw sql statements from the file, and count the number of sql statements in the file 011 * 5. Collect the following information for each sql file: 012 * - file size in bytes 013 * - number of sql statements 014 * - sql statement type of each sql statement and the count of each type, use TCustomSqlStatement.sqlstatementtype to get the sql statement type 015 * - db vendor 016 * 6. Collect the following information for the entire directory: 017 * - total number of sql files 018 * - total number of sql statements 019 * - total number of bytes 020 * - total number of sql statements by type 021 * 022 * the above information can be grouped by db vendor 023 * 7. Output the information to a csv file 024 * 8. add a new option to save all the sql statements to a file in a single line json format, each sql statement is a json object, 025 * and includes the following elements: 026 * - db vendor 027 * - sql statement type 028 * - sql statement size in bytes 029 * - unique hash id of this sql statement,when calculating the unique hash id, all whitespaces must be ignored 030 * - sql statement 031 * 032 * the file name is sql_statements.json 033 * 034 * Sample output: 035 * ======================= Summary Statistics ======================= 036 * Total SQL Files: 6529 037 * Total Bytes: 25353796 038 * Total SQL Statements: 54617 039 * 040 * --- Average Statistics --- 041 * Average File Size (bytes): 3883.259 042 * Average SQL Statements Per File: 8.365 043 * Average Size Per SQL Statement (bytes): 464.211 044 * 045 * Statistics have been written to c:\prg\tmp\a.csv 046 */ 047 048import gudusoft.gsqlparser.EDbVendor; 049import gudusoft.gsqlparser.TCustomSqlStatement; 050import gudusoft.gsqlparser.TGSqlParser; 051import gudusoft.gsqlparser.TStatementList; 052import java.io.*; 053import java.nio.file.Files; 054import java.util.*; 055import java.text.NumberFormat; 056import java.util.Locale; 057import java.security.MessageDigest; 058import java.security.NoSuchAlgorithmException; 059import java.nio.charset.StandardCharsets; 060 061public class CountSql { 062 063 // Create a formatter for numeric values 064 private final NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.US); 065 066 public static void main(String[] args) { 067 if (args.length < 1) { 068 System.out.println("Usage: java gudusoft.gsqlparser.tools.CountSql <directory_path> [output_csv_path] [show_file_details] [export_json] [json_output_dir]"); 069 System.out.println(" - show_file_details: Optional. Set to 'true' to include individual file details in the output (default: false)"); 070 System.out.println(" - export_json: Optional. Set to 'true' to export all SQL statements as JSON (default: false)"); 071 System.out.println(" - json_output_dir: Optional. Directory to save the JSON output file (default: current directory)"); 072 return; 073 } 074 075 String dirPath = args[0]; 076 String outputPath = args.length > 1 ? args[1] : "sql_statistics.csv"; 077 boolean showFileDetails = args.length > 2 ? Boolean.parseBoolean(args[2]) : false; 078 boolean exportJson = args.length > 3 ? Boolean.parseBoolean(args[3]) : false; 079 String jsonOutputDir = args.length > 4 ? args[4] : "."; 080 081 CountSql counter = new CountSql(); 082 counter.processDirectory(dirPath, outputPath, showFileDetails, exportJson, jsonOutputDir); 083 } 084 085 /** 086 * Process all SQL files in a directory and its subdirectories 087 * @param dirPath Directory path to process 088 * @param outputPath Path to output CSV file 089 * @param showFileDetails Whether to include file details section in the output 090 * @param exportJson Whether to export all SQL statements as JSON 091 * @param jsonOutputDir Directory to save the JSON output file 092 */ 093 public void processDirectory(String dirPath, String outputPath, boolean showFileDetails, boolean exportJson, String jsonOutputDir) { 094 File directory = new File(dirPath); 095 096 if (!directory.exists() || !directory.isDirectory()) { 097 System.err.println("Error: " + dirPath + " is not a valid directory"); 098 return; 099 } 100 101 // Statistics for the entire directory 102 int totalFiles = 0; 103 long totalBytes = 0; 104 int totalStatements = 0; 105 Map<String, Integer> totalStatementsByType = new HashMap<>(); 106 Map<EDbVendor, Map<String, Integer>> statementsByVendor = new HashMap<>(); 107 Map<EDbVendor, Integer> filesByVendor = new HashMap<>(); 108 Map<EDbVendor, Long> bytesByVendor = new HashMap<>(); 109 Map<EDbVendor, Integer> statementsByVendorCount = new HashMap<>(); 110 111 // List to store all SQL statement details if JSON export is requested 112 List<SqlStatementDetail> allStatements = exportJson ? new ArrayList<>() : null; 113 114 // CSV output preparation 115 try (PrintWriter csvWriter = new PrintWriter(new FileWriter(outputPath))) { 116 // Write headers for file details section only if requested 117 if (showFileDetails) { 118 csvWriter.println("File Path,File Size (bytes),DB Vendor,SQL Statements Count,Statement Types"); 119 } 120 121 // Process files 122 List<File> sqlFiles = findSqlFiles(directory); 123 totalFiles = sqlFiles.size(); 124 125 for (File sqlFile : sqlFiles) { 126 FileStatistics stats = processFile(sqlFile, dirPath, exportJson); 127 totalBytes += stats.size; 128 totalStatements += stats.statementCount; 129 130 // Update vendor-specific statistics 131 EDbVendor vendor = stats.vendor; 132 filesByVendor.put(vendor, filesByVendor.getOrDefault(vendor, 0) + 1); 133 bytesByVendor.put(vendor, bytesByVendor.getOrDefault(vendor, 0L) + stats.size); 134 statementsByVendorCount.put(vendor, statementsByVendorCount.getOrDefault(vendor, 0) + stats.statementCount); 135 136 // Update statement types 137 for (Map.Entry<String, Integer> entry : stats.statementTypeCount.entrySet()) { 138 String type = entry.getKey(); 139 int count = entry.getValue(); 140 141 // Update overall counts 142 totalStatementsByType.put(type, totalStatementsByType.getOrDefault(type, 0) + count); 143 144 // Update vendor-specific counts 145 Map<String, Integer> vendorTypes = statementsByVendor.getOrDefault(vendor, new HashMap<>()); 146 vendorTypes.put(type, vendorTypes.getOrDefault(type, 0) + count); 147 statementsByVendor.put(vendor, vendorTypes); 148 } 149 150 // Collect SQL statements for JSON export if requested 151 if (exportJson && stats.statementDetails != null) { 152 allStatements.addAll(stats.statementDetails); 153 } 154 155 // Write file details to CSV with formatted numbers only if requested 156 if (showFileDetails) { 157 csvWriter.println(sqlFile.getPath() + "," + 158 formatNumber(stats.size) + "," + 159 vendor + "," + 160 formatNumber(stats.statementCount) + "," + 161 formatStatementTypes(stats.statementTypeCount)); 162 } 163 } 164 165 // Write summary section with formatted numbers 166 // If file details were included, add a blank line to separate sections 167 if (showFileDetails) { 168 csvWriter.println(); 169 } 170 171 csvWriter.println("Summary Statistics"); 172 csvWriter.println("Total SQL Files," + formatNumber(totalFiles)); 173 csvWriter.println("Total Bytes," + formatNumber(totalBytes)); 174 csvWriter.println("Total SQL Statements," + formatNumber(totalStatements)); 175 176 // Calculate and add average statistics 177 if (totalFiles > 0) { 178 double avgFileSize = (double) totalBytes / totalFiles; 179 double avgStatementsPerFile = (double) totalStatements / totalFiles; 180 181 csvWriter.println("Average File Size (bytes)," + formatNumber(avgFileSize)); 182 csvWriter.println("Average SQL Statements Per File," + formatNumber(avgStatementsPerFile)); 183 184 // Add average size per SQL statement 185 if (totalStatements > 0) { 186 double avgSizePerStatement = (double) totalBytes / totalStatements; 187 csvWriter.println("Average Size Per SQL Statement (bytes)," + formatNumber(avgSizePerStatement)); 188 } 189 } 190 191 // Write statement types summary - sorted by count (descending) 192 csvWriter.println("\nStatement Types Summary"); 193 List<Map.Entry<String, Integer>> sortedStatementTypes = new ArrayList<>(totalStatementsByType.entrySet()); 194 sortedStatementTypes.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); 195 196 for (Map.Entry<String, Integer> entry : sortedStatementTypes) { 197 csvWriter.println(entry.getKey() + "," + formatNumber(entry.getValue())); 198 } 199 200 // Write vendor-specific summary - sorted by statement count (descending) 201 csvWriter.println("\nStatistics by DB Vendor"); 202 csvWriter.println("DB Vendor,Files,Bytes,Statements"); 203 204 List<EDbVendor> sortedVendors = new ArrayList<>(statementsByVendorCount.keySet()); 205 sortedVendors.sort((v1, v2) -> statementsByVendorCount.get(v2).compareTo(statementsByVendorCount.get(v1))); 206 207 for (EDbVendor vendor : sortedVendors) { 208 csvWriter.println(vendor + "," + 209 formatNumber(filesByVendor.get(vendor)) + "," + 210 formatNumber(bytesByVendor.get(vendor)) + "," + 211 formatNumber(statementsByVendorCount.get(vendor))); 212 } 213 214 // Write statement types by vendor - vendors and types both sorted by count (descending) 215 csvWriter.println("\nStatement Types by DB Vendor"); 216 csvWriter.println("DB Vendor,Statement Type,Count"); 217 218 for (EDbVendor vendor : sortedVendors) { 219 Map<String, Integer> typeCount = statementsByVendor.get(vendor); 220 221 // Skip if typeCount is null or empty 222 if (typeCount != null && !typeCount.isEmpty()) { 223 // Sort statement types by count for this vendor 224 List<Map.Entry<String, Integer>> sortedTypes = new ArrayList<>(typeCount.entrySet()); 225 sortedTypes.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue())); 226 227 for (Map.Entry<String, Integer> typeEntry : sortedTypes) { 228 csvWriter.println(vendor + "," + 229 typeEntry.getKey() + "," + 230 formatNumber(typeEntry.getValue())); 231 } 232 } else { 233 // If no statement types found for this vendor, output a message 234 csvWriter.println(vendor + ",No specific statement types found,0"); 235 } 236 } 237 238 System.out.println("Statistics have been written to " + outputPath); 239 240 } catch (IOException e) { 241 System.err.println("Error writing to CSV file: " + e.getMessage()); 242 } 243 244 // Export SQL statements to JSON if requested 245 String jsonFilePath = null; 246 if (exportJson) { 247 if (allStatements != null && !allStatements.isEmpty()) { 248 jsonFilePath = exportStatementsToJson(allStatements, jsonOutputDir); 249 if (jsonFilePath != null) { 250 System.out.println("SQL statements have been exported to: " + jsonFilePath); 251 } 252 } else { 253 System.out.println("No JSON file created: No SQL statements were collected"); 254 } 255 } else { 256 System.out.println("No JSON file created: JSON export not requested"); 257 } 258 259 // Print summary statistics to the console - simplified version 260 System.out.println("\n======================= Summary Statistics ======================="); 261 System.out.println("Total SQL Files: " + formatNumber(totalFiles)); 262 System.out.println("Total Bytes: " + formatNumber(totalBytes)); 263 System.out.println("Total SQL Statements: " + formatNumber(totalStatements)); 264 265 // Calculate and add average statistics 266 if (totalFiles > 0) { 267 double avgFileSize = (double) totalBytes / totalFiles; 268 double avgStatementsPerFile = (double) totalStatements / totalFiles; 269 270 System.out.println("\n--- Average Statistics ---"); 271 System.out.println("Average File Size (bytes): " + formatNumber(avgFileSize)); 272 System.out.println("Average SQL Statements Per File: " + formatNumber(avgStatementsPerFile)); 273 274 // Add average size per SQL statement 275 if (totalStatements > 0) { 276 double avgSizePerStatement = (double) totalBytes / totalStatements; 277 System.out.println("Average Size Per SQL Statement (bytes): " + formatNumber(avgSizePerStatement)); 278 } 279 } 280 281 // Print file paths summary 282 System.out.println("\n--- Output Files ---"); 283 System.out.println("CSV Statistics: " + new File(outputPath).getAbsolutePath()); 284 if (jsonFilePath != null) { 285 System.out.println("JSON Statements: " + jsonFilePath); 286 } 287 288 System.out.println("\nStatistics have been written to " + outputPath); 289 } 290 291 /** 292 * Export SQL statements to JSON file 293 * @param statements List of SQL statement details 294 * @param outputDir Directory to save the JSON file 295 * @return The full absolute path of the created JSON file, or null if creation failed 296 */ 297 private String exportStatementsToJson(List<SqlStatementDetail> statements, String outputDir) { 298 // Create File object for the output directory 299 File outputDirectory = new File(outputDir); 300 301 // Create the directory if it doesn't exist 302 if (!outputDirectory.exists()) { 303 if (!outputDirectory.mkdirs()) { 304 System.err.println("No JSON file created: Could not create directory: " + outputDir); 305 return null; 306 } 307 } 308 309 // Ensure the path is a directory 310 if (!outputDirectory.isDirectory()) { 311 System.err.println("No JSON file created: Specified JSON output path is not a directory: " + outputDir); 312 return null; 313 } 314 315 // Create the full path for the JSON file 316 String jsonFileName = "sql_statements.json"; 317 File jsonFile = new File(outputDirectory, jsonFileName); 318 String jsonFilePath = jsonFile.getAbsolutePath(); // Use absolute path for clearer output 319 320 try (PrintWriter jsonWriter = new PrintWriter(new FileWriter(jsonFile))) { 321 for (SqlStatementDetail stmt : statements) { 322 // Write each statement as a single line JSON object 323 jsonWriter.println(stmt.toJson()); 324 } 325 326 return jsonFilePath; 327 328 } catch (IOException e) { 329 System.err.println("No JSON file created: Error writing to JSON file: " + e.getMessage()); 330 return null; 331 } 332 } 333 334 /** 335 * Format number without thousands separators 336 * @param number The number to format 337 * @return Formatted string without thousands separators 338 */ 339 private String formatNumber(Number number) { 340 NumberFormat formatter = NumberFormat.getNumberInstance(Locale.US); 341 formatter.setGroupingUsed(false); // 禁用分组(不使用千分号) 342 343 // 对于整数值,不显示小数部分 344 if (number instanceof Integer || number instanceof Long) { 345 formatter.setMaximumFractionDigits(0); 346 } 347 348 return formatter.format(number); 349 } 350 351 /** 352 * Format the statement types map as a string for CSV output 353 */ 354 private String formatStatementTypes(Map<String, Integer> statementTypes) { 355 StringBuilder sb = new StringBuilder(); 356 for (Map.Entry<String, Integer> entry : statementTypes.entrySet()) { 357 if (sb.length() > 0) { 358 sb.append(";"); 359 } 360 sb.append(entry.getKey()).append(":").append(formatNumber(entry.getValue())); 361 } 362 return sb.toString(); 363 } 364 365 /** 366 * Find all SQL files in a directory and its subdirectories 367 * @param directory Directory to search 368 * @return List of SQL files 369 */ 370 private List<File> findSqlFiles(File directory) { 371 List<File> sqlFiles = new ArrayList<>(); 372 findSqlFilesRecursive(directory, sqlFiles); 373 return sqlFiles; 374 } 375 376 /** 377 * Recursively find SQL files 378 */ 379 private void findSqlFilesRecursive(File directory, List<File> sqlFiles) { 380 File[] files = directory.listFiles(); 381 if (files != null) { 382 for (File file : files) { 383 if (file.isDirectory()) { 384 findSqlFilesRecursive(file, sqlFiles); 385 } else if (file.getName().toLowerCase().endsWith(".sql")) { 386 sqlFiles.add(file); 387 } 388 } 389 } 390 } 391 392 /** 393 * Process a single SQL file 394 * @param file SQL file to process 395 * @param rootDirPath Root directory path for DB vendor detection 396 * @param collectStatements Whether to collect individual SQL statements 397 * @return Statistics for the file 398 */ 399 private FileStatistics processFile(File file, String rootDirPath, boolean collectStatements) { 400 FileStatistics stats = new FileStatistics(); 401 stats.size = file.length(); 402 403 // Skip processing if file size is zero 404 if (stats.size == 0) { 405 System.out.println("Skipping empty file: " + file.getPath()); 406 return stats; 407 } 408 409 // Determine DB vendor 410 stats.vendor = guessDbVendor(file, rootDirPath); 411 412 try { 413 // Read file content 414 String sqlContent = new String(Files.readAllBytes(file.toPath())); 415 416 // Skip files that are empty or only contain whitespace 417 if (sqlContent.trim().isEmpty()) { 418 System.out.println("Skipping file with only whitespace: " + file.getPath()); 419 return stats; 420 } 421 422 // Parse SQL statements 423 TGSqlParser parser = new TGSqlParser(stats.vendor); 424 parser.sqltext = sqlContent; 425 426 // Get raw SQL statements 427 int iRet = parser.getrawsqlstatements(); 428 if (iRet != 0) { 429 System.err.println("Error parsing SQL statements: " + iRet+" for file "+file.getPath()+" with vendor "+stats.vendor); 430 return stats; 431 } 432 TStatementList statements = parser.sqlstatements; 433 stats.statementCount = statements.size(); 434 435 // Initialize statement details list if collecting statements 436 if (collectStatements) { 437 stats.statementDetails = new ArrayList<>(); 438 } 439 440 // Count statement types and collect statements if requested 441 for (int i = 0; i < statements.size(); i++) { 442 TCustomSqlStatement stmt = statements.get(i); 443 String stmtType = stmt.sqlstatementtype.toString(); 444 stats.statementTypeCount.put(stmtType, stats.statementTypeCount.getOrDefault(stmtType, 0) + 1); 445 446 // Collect statement details if requested 447 if (collectStatements) { 448 SqlStatementDetail detail = new SqlStatementDetail(); 449 detail.dbVendor = stats.vendor.toString(); 450 detail.statementType = stmtType; 451 452 // Get the SQL statement text 453 String sqlText = stmt.toString(); 454 detail.sqlStatement = sqlText; 455 456 // Calculate SQL statement size (bytes in UTF-8 encoding) 457 detail.sqlSize = sqlText.getBytes(StandardCharsets.UTF_8).length; 458 459 // Calculate and set the unique hash ID (ignoring whitespaces) 460 detail.uniqueHashId = calculateHashId(sqlText); 461 462 stats.statementDetails.add(detail); 463 } 464 } 465 466 } catch (IOException e) { 467 System.err.println("Error reading file " + file.getPath() + ": " + e.getMessage()); 468 } 469 470 return stats; 471 } 472 473 /** 474 * Guess the database vendor based on directory name or file name 475 * @param file SQL file 476 * @param rootDirPath Root directory path 477 * @return Database vendor 478 */ 479 private EDbVendor guessDbVendor(File file, String rootDirPath) { 480 // Check directory path recursively 481 File currentDir = file.getParentFile(); 482 File rootDir = new File(rootDirPath); 483 484 // Convert both paths to absolute to ensure proper comparison 485 String absoluteRootPath = rootDir.getAbsolutePath(); 486 487 // Traverse up the directory tree until root directory 488 while (currentDir != null && 489 currentDir.getAbsolutePath().startsWith(absoluteRootPath)) { 490 491 String dirName = currentDir.getName().toLowerCase(); 492 EDbVendor vendorFromDir = vendorFromName(dirName); 493 if (vendorFromDir != null) { 494 return vendorFromDir; 495 } 496 497 // Move up to parent directory 498 currentDir = currentDir.getParentFile(); 499 } 500 501 // If no vendor found in directory structure, check file name 502 String fileName = file.getName().toLowerCase(); 503 EDbVendor vendorFromFile = vendorFromName(fileName); 504 if (vendorFromFile != null) { 505 return vendorFromFile; 506 } 507 508 // Default to generic 509 return EDbVendor.dbvgeneric; 510 } 511 512 /** 513 * Determine vendor from a name (directory or file) 514 * Supports a comprehensive list of database vendors 515 * @param name Directory or file name to analyze 516 * @return The identified database vendor or null if not recognized 517 */ 518 private EDbVendor vendorFromName(String name) { 519 name = name.toLowerCase(); 520 521 // Oracle 522 if (name.contains("oracle") || name.contains("ora") || name.contains("plsql")) { 523 return EDbVendor.dbvoracle; 524 } 525 // MySQL 526 else if (name.contains("mysql") || name.contains("maria")) { 527 return EDbVendor.dbvmysql; 528 } 529 // PostgreSQL 530 else if (name.contains("postgresql") || name.contains("postgres") || name.contains("pgsql")) { 531 return EDbVendor.dbvpostgresql; 532 } 533 // SQL Server 534 else if (name.contains("sqlserver") || name.contains("mssql") || name.contains("sql_server") || name.contains("tsql")) { 535 return EDbVendor.dbvmssql; 536 } 537 // DB2 538 else if (name.contains("db2")) { 539 return EDbVendor.dbvdb2; 540 } 541 // Sybase 542 else if (name.contains("sybase") || name.contains("ase")) { 543 return EDbVendor.dbvsybase; 544 } 545 // Informix 546 else if (name.contains("informix")) { 547 return EDbVendor.dbvinformix; 548 } 549 // Teradata 550 else if (name.contains("teradata")) { 551 return EDbVendor.dbvteradata; 552 } 553 // Netezza 554 else if (name.contains("netezza")) { 555 return EDbVendor.dbvnetezza; 556 } 557 // Hive 558 else if (name.contains("hive")) { 559 return EDbVendor.dbvhive; 560 } 561 // Greenplum 562 else if (name.contains("greenplum")) { 563 return EDbVendor.dbvgreenplum; 564 } 565 // Redshift 566 else if (name.contains("redshift")) { 567 return EDbVendor.dbvredshift; 568 } 569 // Snowflake 570 else if (name.contains("snowflake")) { 571 return EDbVendor.dbvsnowflake; 572 } 573 // Vertica 574 else if (name.contains("vertica")) { 575 return EDbVendor.dbvvertica; 576 } 577 // Athena 578 else if (name.contains("athena")) { 579 return EDbVendor.dbvathena; 580 } 581 // BigQuery 582 else if (name.contains("bigquery") || name.contains("big_query") || name.contains("big-query")) { 583 return EDbVendor.dbvbigquery; 584 } 585 // Couchbase 586 else if (name.contains("couchbase") || name.contains("n1ql")) { 587 return EDbVendor.dbvcouchbase; 588 } 589 // Databricks 590 else if (name.contains("databricks") || name.contains("delta")) { 591 return EDbVendor.dbvdatabricks; 592 } 593 // DAX 594 else if (name.contains("dax") || name.contains("powerbi")) { 595 return EDbVendor.dbvdax; 596 } 597 // GaussDB 598 else if (name.contains("gaussdb") || name.contains("gauss")) { 599 return EDbVendor.dbvgaussdb; 600 } 601 // HANA 602 else if (name.contains("hana") || name.contains("sap")) { 603 return EDbVendor.dbvhana; 604 } 605 // Impala 606 else if (name.contains("impala")) { 607 return EDbVendor.dbvimpala; 608 } 609 // MDX 610 else if (name.contains("mdx") || name.contains("multidimensional")) { 611 return EDbVendor.dbvmdx; 612 } 613 // ODBC 614 else if (name.contains("odbc")) { 615 return EDbVendor.dbvodbc; 616 } 617 // OpenEdge 618 else if (name.contains("openedge") || name.contains("progress")) { 619 return EDbVendor.dbvopenedge; 620 } 621 // Presto 622 else if (name.contains("presto")) { 623 return EDbVendor.dbvpresto; 624 } 625 // SparkSQL 626 else if (name.contains("sparksql") || name.contains("spark-sql") || name.contains("spark_sql") || name.contains("spark")) { 627 return EDbVendor.dbvsparksql; 628 } 629 // Trino 630 else if (name.contains("trino") || name.contains("starburst")) { 631 return EDbVendor.dbvtrino; 632 } 633 634 return null; 635 } 636 637 /** 638 * Calculate unique hash ID for SQL statement ignoring all whitespaces 639 * This implementation uses SHA-256 and will produce consistent results across languages 640 * when implementing the same algorithm. 641 * @param sql SQL statement to hash 642 * @return Unique hash ID as a lowercase hex string 643 */ 644 public static String calculateHashId(String sql) { 645 if (sql == null || sql.isEmpty()) { 646 return ""; 647 } 648 649 try { 650 // 1. Normalize the SQL by removing ALL whitespace 651 String noWhitespace = sql.replaceAll("\\s+", ""); 652 653 // 2. Convert to bytes using UTF-8 encoding (consistent across platforms) 654 byte[] sqlBytes = noWhitespace.getBytes(StandardCharsets.UTF_8); 655 656 // 3. Calculate SHA-256 hash (widely available in all languages) 657 MessageDigest digest = MessageDigest.getInstance("SHA-256"); 658 byte[] hashBytes = digest.digest(sqlBytes); 659 660 // 4. Convert to lowercase hexadecimal (standard format across languages) 661 StringBuilder hexString = new StringBuilder(); 662 for (byte b : hashBytes) { 663 String hex = Integer.toHexString(0xff & b); 664 if (hex.length() == 1) { 665 hexString.append('0'); // Ensure two characters for each byte 666 } 667 hexString.append(hex); 668 } 669 670 return hexString.toString(); 671 } catch (NoSuchAlgorithmException e) { 672 System.err.println("Error calculating hash ID: " + e.getMessage()); 673 return "hash_error_" + System.currentTimeMillis(); 674 } 675 } 676 677 /** 678 * Class to hold statistics for a single SQL file 679 */ 680 private static class FileStatistics { 681 long size; 682 EDbVendor vendor; 683 int statementCount; 684 Map<String, Integer> statementTypeCount = new HashMap<>(); 685 List<SqlStatementDetail> statementDetails; // For JSON export 686 } 687 688 /** 689 * Class to hold details of a single SQL statement for JSON export 690 */ 691 private static class SqlStatementDetail { 692 String dbVendor; 693 String statementType; 694 int sqlSize; // New field for SQL statement size 695 String sqlStatement; 696 String uniqueHashId; // Field for unique hash ID 697 698 /** 699 * Convert to JSON format 700 * @return JSON representation of this SQL statement 701 */ 702 public String toJson() { 703 // Simple JSON string building (no external library) 704 StringBuilder json = new StringBuilder("{"); 705 json.append("\"dbVendor\":\"").append(escapeJson(dbVendor)).append("\","); 706 json.append("\"statementType\":\"").append(escapeJson(statementType)).append("\","); 707 json.append("\"sqlSize\":").append(sqlSize).append(","); 708 json.append("\"uniqueHashId\":\"").append(escapeJson(uniqueHashId)).append("\","); 709 json.append("\"sqlStatement\":\"").append(escapeJson(sqlStatement)).append("\""); 710 json.append("}"); 711 return json.toString(); 712 } 713 714 /** 715 * Escape special characters for JSON string 716 * @param str String to escape 717 * @return Escaped string 718 */ 719 private String escapeJson(String str) { 720 if (str == null) { 721 return ""; 722 } 723 724 StringBuilder sb = new StringBuilder(); 725 for (int i = 0; i < str.length(); i++) { 726 char ch = str.charAt(i); 727 switch (ch) { 728 case '\"': 729 sb.append("\\\""); 730 break; 731 case '\\': 732 sb.append("\\\\"); 733 break; 734 case '\b': 735 sb.append("\\b"); 736 break; 737 case '\f': 738 sb.append("\\f"); 739 break; 740 case '\n': 741 sb.append("\\n"); 742 break; 743 case '\r': 744 sb.append("\\r"); 745 break; 746 case '\t': 747 sb.append("\\t"); 748 break; 749 default: 750 sb.append(ch); 751 } 752 } 753 return sb.toString(); 754 } 755 } 756}