Source code

001package gudusoft.gsqlparser.tools;
002
003/**
004 * Function of this class :
005 * 
006 * 1. Take a directory path as input
007 * 2. Read all the sql files in the directory recursively and read the content of the file with .sql extension
008 * 3. Guess the database vendor based on the directory name or file name, if the db vendor is guessed from the directory name, 
009 *    then all the files in the directory should be of the same db vendor,otherwise guess the db vendor from the filename, otherwise use the default db vendor as mysql
010 * 4. Use TGSqlParser.getrawsqlstatements() to get the raw sql statements from the file, and count the number of sql statements in the file
011 * 5. Collect the following information for each sql file:
012 *    - file size in bytes
013 *    - number of sql statements
014 *    - sql statement type of each sql statement and the count of each type, use TCustomSqlStatement.sqlstatementtype to get the sql statement type
015 *    - db vendor
016 * 6. Collect the following information for the entire directory:
017 *    - total number of sql files
018 *    - total number of sql statements
019 *    - total number of bytes
020 *    - total number of sql statements by type
021 *    
022 *    the above information can be grouped by db vendor
023 * 7. Output the information to a csv file
024 * 8. add a new option to save all the sql statements to a file in a single line json format, each sql statement is a json object, 
025 *    and includes the following elements: 
026 *    - db vendor
027 *    - sql statement type
028 *    - sql statement size in bytes
029 *    - unique hash id of this sql statement,when calculating the unique hash id, all whitespaces must be ignored
030 *    - sql statement
031 * 
032 *  the file name is sql_statements.json
033 * 
034 * Sample output:
035 * ======================= Summary Statistics =======================
036 * Total SQL Files: 6529
037 * Total Bytes: 25353796
038 * Total SQL Statements: 54617
039 * 
040 * --- Average Statistics ---
041 * Average File Size (bytes): 3883.259
042 * Average SQL Statements Per File: 8.365
043 * Average Size Per SQL Statement (bytes): 464.211
044 * 
045 * Statistics have been written to c:\prg\tmp\a.csv
046 */
047
048import gudusoft.gsqlparser.EDbVendor;
049import gudusoft.gsqlparser.TCustomSqlStatement;
050import gudusoft.gsqlparser.TGSqlParser;
051import gudusoft.gsqlparser.TStatementList;
052import java.io.*;
053import java.nio.file.Files;
054import java.util.*;
055import java.text.NumberFormat;
056import java.util.Locale;
057import java.security.MessageDigest;
058import java.security.NoSuchAlgorithmException;
059import java.nio.charset.StandardCharsets;
060
061public class CountSql {
062    
063    // Create a formatter for numeric values
064    private final NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.US);
065    
066    public static void main(String[] args) {
067        if (args.length < 1) {
068            System.out.println("Usage: java gudusoft.gsqlparser.tools.CountSql <directory_path> [output_csv_path] [show_file_details] [export_json] [json_output_dir]");
069            System.out.println("  - show_file_details: Optional. Set to 'true' to include individual file details in the output (default: false)");
070            System.out.println("  - export_json: Optional. Set to 'true' to export all SQL statements as JSON (default: false)");
071            System.out.println("  - json_output_dir: Optional. Directory to save the JSON output file (default: current directory)");
072            return;
073        }
074        
075        String dirPath = args[0];
076        String outputPath = args.length > 1 ? args[1] : "sql_statistics.csv";
077        boolean showFileDetails = args.length > 2 ? Boolean.parseBoolean(args[2]) : false;
078        boolean exportJson = args.length > 3 ? Boolean.parseBoolean(args[3]) : false;
079        String jsonOutputDir = args.length > 4 ? args[4] : ".";
080        
081        CountSql counter = new CountSql();
082        counter.processDirectory(dirPath, outputPath, showFileDetails, exportJson, jsonOutputDir);
083    }
084    
085    /**
086     * Process all SQL files in a directory and its subdirectories
087     * @param dirPath Directory path to process
088     * @param outputPath Path to output CSV file
089     * @param showFileDetails Whether to include file details section in the output
090     * @param exportJson Whether to export all SQL statements as JSON
091     * @param jsonOutputDir Directory to save the JSON output file
092     */
093    public void processDirectory(String dirPath, String outputPath, boolean showFileDetails, boolean exportJson, String jsonOutputDir) {
094        File directory = new File(dirPath);
095        
096        if (!directory.exists() || !directory.isDirectory()) {
097            System.err.println("Error: " + dirPath + " is not a valid directory");
098            return;
099        }
100        
101        // Statistics for the entire directory
102        int totalFiles = 0;
103        long totalBytes = 0;
104        int totalStatements = 0;
105        Map<String, Integer> totalStatementsByType = new HashMap<>();
106        Map<EDbVendor, Map<String, Integer>> statementsByVendor = new HashMap<>();
107        Map<EDbVendor, Integer> filesByVendor = new HashMap<>();
108        Map<EDbVendor, Long> bytesByVendor = new HashMap<>();
109        Map<EDbVendor, Integer> statementsByVendorCount = new HashMap<>();
110        
111        // List to store all SQL statement details if JSON export is requested
112        List<SqlStatementDetail> allStatements = exportJson ? new ArrayList<>() : null;
113        
114        // CSV output preparation
115        try (PrintWriter csvWriter = new PrintWriter(new FileWriter(outputPath))) {
116            // Write headers for file details section only if requested
117            if (showFileDetails) {
118                csvWriter.println("File Path,File Size (bytes),DB Vendor,SQL Statements Count,Statement Types");
119            }
120            
121            // Process files
122            List<File> sqlFiles = findSqlFiles(directory);
123            totalFiles = sqlFiles.size();
124            
125            for (File sqlFile : sqlFiles) {
126                FileStatistics stats = processFile(sqlFile, dirPath, exportJson);
127                totalBytes += stats.size;
128                totalStatements += stats.statementCount;
129                
130                // Update vendor-specific statistics
131                EDbVendor vendor = stats.vendor;
132                filesByVendor.put(vendor, filesByVendor.getOrDefault(vendor, 0) + 1);
133                bytesByVendor.put(vendor, bytesByVendor.getOrDefault(vendor, 0L) + stats.size);
134                statementsByVendorCount.put(vendor, statementsByVendorCount.getOrDefault(vendor, 0) + stats.statementCount);
135                
136                // Update statement types
137                for (Map.Entry<String, Integer> entry : stats.statementTypeCount.entrySet()) {
138                    String type = entry.getKey();
139                    int count = entry.getValue();
140                    
141                    // Update overall counts
142                    totalStatementsByType.put(type, totalStatementsByType.getOrDefault(type, 0) + count);
143                    
144                    // Update vendor-specific counts
145                    Map<String, Integer> vendorTypes = statementsByVendor.getOrDefault(vendor, new HashMap<>());
146                    vendorTypes.put(type, vendorTypes.getOrDefault(type, 0) + count);
147                    statementsByVendor.put(vendor, vendorTypes);
148                }
149                
150                // Collect SQL statements for JSON export if requested
151                if (exportJson && stats.statementDetails != null) {
152                    allStatements.addAll(stats.statementDetails);
153                }
154                
155                // Write file details to CSV with formatted numbers only if requested
156                if (showFileDetails) {
157                    csvWriter.println(sqlFile.getPath() + "," + 
158                                    formatNumber(stats.size) + "," + 
159                                    vendor + "," + 
160                                    formatNumber(stats.statementCount) + "," + 
161                                    formatStatementTypes(stats.statementTypeCount));
162                }
163            }
164            
165            // Write summary section with formatted numbers
166            // If file details were included, add a blank line to separate sections
167            if (showFileDetails) {
168                csvWriter.println();
169            }
170            
171            csvWriter.println("Summary Statistics");
172            csvWriter.println("Total SQL Files," + formatNumber(totalFiles));
173            csvWriter.println("Total Bytes," + formatNumber(totalBytes));
174            csvWriter.println("Total SQL Statements," + formatNumber(totalStatements));
175            
176            // Calculate and add average statistics
177            if (totalFiles > 0) {
178                double avgFileSize = (double) totalBytes / totalFiles;
179                double avgStatementsPerFile = (double) totalStatements / totalFiles;
180                
181                csvWriter.println("Average File Size (bytes)," + formatNumber(avgFileSize));
182                csvWriter.println("Average SQL Statements Per File," + formatNumber(avgStatementsPerFile));
183                
184                // Add average size per SQL statement
185                if (totalStatements > 0) {
186                    double avgSizePerStatement = (double) totalBytes / totalStatements;
187                    csvWriter.println("Average Size Per SQL Statement (bytes)," + formatNumber(avgSizePerStatement));
188                }
189            }
190            
191            // Write statement types summary - sorted by count (descending)
192            csvWriter.println("\nStatement Types Summary");
193            List<Map.Entry<String, Integer>> sortedStatementTypes = new ArrayList<>(totalStatementsByType.entrySet());
194            sortedStatementTypes.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue()));
195            
196            for (Map.Entry<String, Integer> entry : sortedStatementTypes) {
197                csvWriter.println(entry.getKey() + "," + formatNumber(entry.getValue()));
198            }
199            
200            // Write vendor-specific summary - sorted by statement count (descending)
201            csvWriter.println("\nStatistics by DB Vendor");
202            csvWriter.println("DB Vendor,Files,Bytes,Statements");
203            
204            List<EDbVendor> sortedVendors = new ArrayList<>(statementsByVendorCount.keySet());
205            sortedVendors.sort((v1, v2) -> statementsByVendorCount.get(v2).compareTo(statementsByVendorCount.get(v1)));
206            
207            for (EDbVendor vendor : sortedVendors) {
208                csvWriter.println(vendor + "," + 
209                               formatNumber(filesByVendor.get(vendor)) + "," + 
210                               formatNumber(bytesByVendor.get(vendor)) + "," + 
211                               formatNumber(statementsByVendorCount.get(vendor)));
212            }
213            
214            // Write statement types by vendor - vendors and types both sorted by count (descending)
215            csvWriter.println("\nStatement Types by DB Vendor");
216            csvWriter.println("DB Vendor,Statement Type,Count");
217            
218            for (EDbVendor vendor : sortedVendors) {
219                Map<String, Integer> typeCount = statementsByVendor.get(vendor);
220                
221                // Skip if typeCount is null or empty
222                if (typeCount != null && !typeCount.isEmpty()) {
223                    // Sort statement types by count for this vendor
224                    List<Map.Entry<String, Integer>> sortedTypes = new ArrayList<>(typeCount.entrySet());
225                    sortedTypes.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue()));
226                    
227                    for (Map.Entry<String, Integer> typeEntry : sortedTypes) {
228                        csvWriter.println(vendor + "," + 
229                                       typeEntry.getKey() + "," + 
230                                       formatNumber(typeEntry.getValue()));
231                    }
232                } else {
233                    // If no statement types found for this vendor, output a message
234                    csvWriter.println(vendor + ",No specific statement types found,0");
235                }
236            }
237            
238            System.out.println("Statistics have been written to " + outputPath);
239            
240        } catch (IOException e) {
241            System.err.println("Error writing to CSV file: " + e.getMessage());
242        }
243        
244        // Export SQL statements to JSON if requested
245        String jsonFilePath = null;
246        if (exportJson) {
247            if (allStatements != null && !allStatements.isEmpty()) {
248                jsonFilePath = exportStatementsToJson(allStatements, jsonOutputDir);
249                if (jsonFilePath != null) {
250                    System.out.println("SQL statements have been exported to: " + jsonFilePath);
251                }
252            } else {
253                System.out.println("No JSON file created: No SQL statements were collected");
254            }
255        } else {
256            System.out.println("No JSON file created: JSON export not requested");
257        }
258
259        // Print summary statistics to the console - simplified version
260        System.out.println("\n======================= Summary Statistics =======================");
261        System.out.println("Total SQL Files: " + formatNumber(totalFiles));
262        System.out.println("Total Bytes: " + formatNumber(totalBytes));
263        System.out.println("Total SQL Statements: " + formatNumber(totalStatements));
264
265        // Calculate and add average statistics
266        if (totalFiles > 0) {
267            double avgFileSize = (double) totalBytes / totalFiles;
268            double avgStatementsPerFile = (double) totalStatements / totalFiles;
269            
270            System.out.println("\n--- Average Statistics ---");
271            System.out.println("Average File Size (bytes): " + formatNumber(avgFileSize));
272            System.out.println("Average SQL Statements Per File: " + formatNumber(avgStatementsPerFile));
273            
274            // Add average size per SQL statement
275            if (totalStatements > 0) {
276                double avgSizePerStatement = (double) totalBytes / totalStatements;
277                System.out.println("Average Size Per SQL Statement (bytes): " + formatNumber(avgSizePerStatement));
278            }
279        }
280
281        // Print file paths summary
282        System.out.println("\n--- Output Files ---");
283        System.out.println("CSV Statistics: " + new File(outputPath).getAbsolutePath());
284        if (jsonFilePath != null) {
285            System.out.println("JSON Statements: " + jsonFilePath);
286        }
287
288        System.out.println("\nStatistics have been written to " + outputPath);
289    }
290    
291    /**
292     * Export SQL statements to JSON file
293     * @param statements List of SQL statement details
294     * @param outputDir Directory to save the JSON file
295     * @return The full absolute path of the created JSON file, or null if creation failed
296     */
297    private String exportStatementsToJson(List<SqlStatementDetail> statements, String outputDir) {
298        // Create File object for the output directory
299        File outputDirectory = new File(outputDir);
300        
301        // Create the directory if it doesn't exist
302        if (!outputDirectory.exists()) {
303            if (!outputDirectory.mkdirs()) {
304                System.err.println("No JSON file created: Could not create directory: " + outputDir);
305                return null;
306            }
307        }
308        
309        // Ensure the path is a directory
310        if (!outputDirectory.isDirectory()) {
311            System.err.println("No JSON file created: Specified JSON output path is not a directory: " + outputDir);
312            return null;
313        }
314        
315        // Create the full path for the JSON file
316        String jsonFileName = "sql_statements.json";
317        File jsonFile = new File(outputDirectory, jsonFileName);
318        String jsonFilePath = jsonFile.getAbsolutePath(); // Use absolute path for clearer output
319        
320        try (PrintWriter jsonWriter = new PrintWriter(new FileWriter(jsonFile))) {
321            for (SqlStatementDetail stmt : statements) {
322                // Write each statement as a single line JSON object
323                jsonWriter.println(stmt.toJson());
324            }
325            
326            return jsonFilePath;
327            
328        } catch (IOException e) {
329            System.err.println("No JSON file created: Error writing to JSON file: " + e.getMessage());
330            return null;
331        }
332    }
333    
334    /**
335     * Format number without thousands separators
336     * @param number The number to format
337     * @return Formatted string without thousands separators
338     */
339    private String formatNumber(Number number) {
340        NumberFormat formatter = NumberFormat.getNumberInstance(Locale.US);
341        formatter.setGroupingUsed(false); // 禁用分组（不使用千分号）
342        
343        // 对于整数值，不显示小数部分
344        if (number instanceof Integer || number instanceof Long) {
345            formatter.setMaximumFractionDigits(0);
346        }
347        
348        return formatter.format(number);
349    }
350    
351    /**
352     * Format the statement types map as a string for CSV output
353     */
354    private String formatStatementTypes(Map<String, Integer> statementTypes) {
355        StringBuilder sb = new StringBuilder();
356        for (Map.Entry<String, Integer> entry : statementTypes.entrySet()) {
357            if (sb.length() > 0) {
358                sb.append(";");
359            }
360            sb.append(entry.getKey()).append(":").append(formatNumber(entry.getValue()));
361        }
362        return sb.toString();
363    }
364    
365    /**
366     * Find all SQL files in a directory and its subdirectories
367     * @param directory Directory to search
368     * @return List of SQL files
369     */
370    private List<File> findSqlFiles(File directory) {
371        List<File> sqlFiles = new ArrayList<>();
372        findSqlFilesRecursive(directory, sqlFiles);
373        return sqlFiles;
374    }
375    
376    /**
377     * Recursively find SQL files
378     */
379    private void findSqlFilesRecursive(File directory, List<File> sqlFiles) {
380        File[] files = directory.listFiles();
381        if (files != null) {
382            for (File file : files) {
383                if (file.isDirectory()) {
384                    findSqlFilesRecursive(file, sqlFiles);
385                } else if (file.getName().toLowerCase().endsWith(".sql")) {
386                    sqlFiles.add(file);
387                }
388            }
389        }
390    }
391    
392    /**
393     * Process a single SQL file
394     * @param file SQL file to process
395     * @param rootDirPath Root directory path for DB vendor detection
396     * @param collectStatements Whether to collect individual SQL statements
397     * @return Statistics for the file
398     */
399    private FileStatistics processFile(File file, String rootDirPath, boolean collectStatements) {
400        FileStatistics stats = new FileStatistics();
401        stats.size = file.length();
402        
403        // Skip processing if file size is zero
404        if (stats.size == 0) {
405            System.out.println("Skipping empty file: " + file.getPath());
406            return stats;
407        }
408        
409        // Determine DB vendor
410        stats.vendor = guessDbVendor(file, rootDirPath);
411        
412        try {
413            // Read file content
414            String sqlContent = new String(Files.readAllBytes(file.toPath()));
415            
416            // Skip files that are empty or only contain whitespace
417            if (sqlContent.trim().isEmpty()) {
418                System.out.println("Skipping file with only whitespace: " + file.getPath());
419                return stats;
420            }
421            
422            // Parse SQL statements
423            TGSqlParser parser = new TGSqlParser(stats.vendor);
424            parser.sqltext = sqlContent;
425            
426            // Get raw SQL statements
427            int iRet = parser.getrawsqlstatements();
428            if (iRet != 0) {
429                System.err.println("Error parsing SQL statements: " + iRet+" for file "+file.getPath()+" with vendor "+stats.vendor);
430                return stats;
431            }
432            TStatementList statements = parser.sqlstatements;
433            stats.statementCount = statements.size();
434            
435            // Initialize statement details list if collecting statements
436            if (collectStatements) {
437                stats.statementDetails = new ArrayList<>();
438            }
439            
440            // Count statement types and collect statements if requested
441            for (int i = 0; i < statements.size(); i++) {
442                TCustomSqlStatement stmt = statements.get(i);
443                String stmtType = stmt.sqlstatementtype.toString();
444                stats.statementTypeCount.put(stmtType, stats.statementTypeCount.getOrDefault(stmtType, 0) + 1);
445                
446                // Collect statement details if requested
447                if (collectStatements) {
448                    SqlStatementDetail detail = new SqlStatementDetail();
449                    detail.dbVendor = stats.vendor.toString();
450                    detail.statementType = stmtType;
451                    
452                    // Get the SQL statement text
453                    String sqlText = stmt.toString();
454                    detail.sqlStatement = sqlText;
455                    
456                    // Calculate SQL statement size (bytes in UTF-8 encoding)
457                    detail.sqlSize = sqlText.getBytes(StandardCharsets.UTF_8).length;
458                    
459                    // Calculate and set the unique hash ID (ignoring whitespaces)
460                    detail.uniqueHashId = calculateHashId(sqlText);
461                    
462                    stats.statementDetails.add(detail);
463                }
464            }
465            
466        } catch (IOException e) {
467            System.err.println("Error reading file " + file.getPath() + ": " + e.getMessage());
468        }
469        
470        return stats;
471    }
472    
473    /**
474     * Guess the database vendor based on directory name or file name
475     * @param file SQL file
476     * @param rootDirPath Root directory path
477     * @return Database vendor
478     */
479    private EDbVendor guessDbVendor(File file, String rootDirPath) {
480        // Check directory path recursively
481        File currentDir = file.getParentFile();
482        File rootDir = new File(rootDirPath);
483        
484        // Convert both paths to absolute to ensure proper comparison
485        String absoluteRootPath = rootDir.getAbsolutePath();
486        
487        // Traverse up the directory tree until root directory
488        while (currentDir != null && 
489               currentDir.getAbsolutePath().startsWith(absoluteRootPath)) {
490            
491            String dirName = currentDir.getName().toLowerCase();
492            EDbVendor vendorFromDir = vendorFromName(dirName);
493            if (vendorFromDir != null) {
494                return vendorFromDir;
495            }
496            
497            // Move up to parent directory
498            currentDir = currentDir.getParentFile();
499        }
500        
501        // If no vendor found in directory structure, check file name
502        String fileName = file.getName().toLowerCase();
503        EDbVendor vendorFromFile = vendorFromName(fileName);
504        if (vendorFromFile != null) {
505            return vendorFromFile;
506        }
507        
508        // Default to generic
509        return EDbVendor.dbvgeneric;
510    }
511    
512    /**
513     * Determine vendor from a name (directory or file)
514     * Supports a comprehensive list of database vendors
515     * @param name Directory or file name to analyze
516     * @return The identified database vendor or null if not recognized
517     */
518    private EDbVendor vendorFromName(String name) {
519        name = name.toLowerCase();
520        
521        // Oracle
522        if (name.contains("oracle") || name.contains("ora") || name.contains("plsql")) {
523            return EDbVendor.dbvoracle;
524        }
525        // MySQL
526        else if (name.contains("mysql") || name.contains("maria")) {
527            return EDbVendor.dbvmysql;
528        }
529        // PostgreSQL
530        else if (name.contains("postgresql") || name.contains("postgres") || name.contains("pgsql")) {
531            return EDbVendor.dbvpostgresql;
532        }
533        // SQL Server
534        else if (name.contains("sqlserver") || name.contains("mssql") || name.contains("sql_server") || name.contains("tsql")) {
535            return EDbVendor.dbvmssql;
536        }
537        // DB2
538        else if (name.contains("db2")) {
539            return EDbVendor.dbvdb2;
540        }
541        // Sybase
542        else if (name.contains("sybase") || name.contains("ase")) {
543            return EDbVendor.dbvsybase;
544        }
545        // Informix
546        else if (name.contains("informix")) {
547            return EDbVendor.dbvinformix;
548        }
549        // Teradata
550        else if (name.contains("teradata")) {
551            return EDbVendor.dbvteradata;
552        }
553        // Netezza
554        else if (name.contains("netezza")) {
555            return EDbVendor.dbvnetezza;
556        }
557        // Hive
558        else if (name.contains("hive")) {
559            return EDbVendor.dbvhive;
560        }
561        // Greenplum
562        else if (name.contains("greenplum")) {
563            return EDbVendor.dbvgreenplum;
564        }
565        // Redshift
566        else if (name.contains("redshift")) {
567            return EDbVendor.dbvredshift;
568        }
569        // Snowflake
570        else if (name.contains("snowflake")) {
571            return EDbVendor.dbvsnowflake;
572        }
573        // Vertica
574        else if (name.contains("vertica")) {
575            return EDbVendor.dbvvertica;
576        }
577        // Athena
578        else if (name.contains("athena")) {
579            return EDbVendor.dbvathena;
580        }
581        // BigQuery
582        else if (name.contains("bigquery") || name.contains("big_query") || name.contains("big-query")) {
583            return EDbVendor.dbvbigquery;
584        }
585        // Couchbase
586        else if (name.contains("couchbase") || name.contains("n1ql")) {
587            return EDbVendor.dbvcouchbase;
588        }
589        // Databricks
590        else if (name.contains("databricks") || name.contains("delta")) {
591            return EDbVendor.dbvdatabricks;
592        }
593        // DAX
594        else if (name.contains("dax") || name.contains("powerbi")) {
595            return EDbVendor.dbvdax;
596        }
597        // GaussDB
598        else if (name.contains("gaussdb") || name.contains("gauss")) {
599            return EDbVendor.dbvgaussdb;
600        }
601        // HANA
602        else if (name.contains("hana") || name.contains("sap")) {
603            return EDbVendor.dbvhana;
604        }
605        // Impala
606        else if (name.contains("impala")) {
607            return EDbVendor.dbvimpala;
608        }
609        // MDX
610        else if (name.contains("mdx") || name.contains("multidimensional")) {
611            return EDbVendor.dbvmdx;
612        }
613        // ODBC
614        else if (name.contains("odbc")) {
615            return EDbVendor.dbvodbc;
616        }
617        // OpenEdge
618        else if (name.contains("openedge") || name.contains("progress")) {
619            return EDbVendor.dbvopenedge;
620        }
621        // Presto
622        else if (name.contains("presto")) {
623            return EDbVendor.dbvpresto;
624        }
625        // SparkSQL
626        else if (name.contains("sparksql") || name.contains("spark-sql") || name.contains("spark_sql") || name.contains("spark")) {
627            return EDbVendor.dbvsparksql;
628        }
629        // Trino
630        else if (name.contains("trino") || name.contains("starburst")) {
631            return EDbVendor.dbvtrino;
632        }
633        
634        return null;
635    }
636    
637    /**
638     * Calculate unique hash ID for SQL statement ignoring all whitespaces
639     * This implementation uses SHA-256 and will produce consistent results across languages
640     * when implementing the same algorithm.
641     * @param sql SQL statement to hash
642     * @return Unique hash ID as a lowercase hex string
643     */
644    public static String calculateHashId(String sql) {
645        if (sql == null || sql.isEmpty()) {
646            return "";
647        }
648        
649        try {
650            // 1. Normalize the SQL by removing ALL whitespace
651            String noWhitespace = sql.replaceAll("\\s+", "");
652            
653            // 2. Convert to bytes using UTF-8 encoding (consistent across platforms)
654            byte[] sqlBytes = noWhitespace.getBytes(StandardCharsets.UTF_8);
655            
656            // 3. Calculate SHA-256 hash (widely available in all languages)
657            MessageDigest digest = MessageDigest.getInstance("SHA-256");
658            byte[] hashBytes = digest.digest(sqlBytes);
659            
660            // 4. Convert to lowercase hexadecimal (standard format across languages)
661            StringBuilder hexString = new StringBuilder();
662            for (byte b : hashBytes) {
663                String hex = Integer.toHexString(0xff & b);
664                if (hex.length() == 1) {
665                    hexString.append('0'); // Ensure two characters for each byte
666                }
667                hexString.append(hex);
668            }
669            
670            return hexString.toString();
671        } catch (NoSuchAlgorithmException e) {
672            System.err.println("Error calculating hash ID: " + e.getMessage());
673            return "hash_error_" + System.currentTimeMillis();
674        }
675    }
676    
677    /**
678     * Class to hold statistics for a single SQL file
679     */
680    private static class FileStatistics {
681        long size;
682        EDbVendor vendor;
683        int statementCount;
684        Map<String, Integer> statementTypeCount = new HashMap<>();
685        List<SqlStatementDetail> statementDetails; // For JSON export
686    }
687    
688    /**
689     * Class to hold details of a single SQL statement for JSON export
690     */
691    private static class SqlStatementDetail {
692        String dbVendor;
693        String statementType;
694        int sqlSize;         // New field for SQL statement size
695        String sqlStatement;
696        String uniqueHashId; // Field for unique hash ID
697        
698        /**
699         * Convert to JSON format
700         * @return JSON representation of this SQL statement
701         */
702        public String toJson() {
703            // Simple JSON string building (no external library)
704            StringBuilder json = new StringBuilder("{");
705            json.append("\"dbVendor\":\"").append(escapeJson(dbVendor)).append("\",");
706            json.append("\"statementType\":\"").append(escapeJson(statementType)).append("\",");
707            json.append("\"sqlSize\":").append(sqlSize).append(",");
708            json.append("\"uniqueHashId\":\"").append(escapeJson(uniqueHashId)).append("\",");
709            json.append("\"sqlStatement\":\"").append(escapeJson(sqlStatement)).append("\"");
710            json.append("}");
711            return json.toString();
712        }
713        
714        /**
715         * Escape special characters for JSON string
716         * @param str String to escape
717         * @return Escaped string
718         */
719        private String escapeJson(String str) {
720            if (str == null) {
721                return "";
722            }
723            
724            StringBuilder sb = new StringBuilder();
725            for (int i = 0; i < str.length(); i++) {
726                char ch = str.charAt(i);
727                switch (ch) {
728                    case '\"':
729                        sb.append("\\\"");
730                        break;
731                    case '\\':
732                        sb.append("\\\\");
733                        break;
734                    case '\b':
735                        sb.append("\\b");
736                        break;
737                    case '\f':
738                        sb.append("\\f");
739                        break;
740                    case '\n':
741                        sb.append("\\n");
742                        break;
743                    case '\r':
744                        sb.append("\\r");
745                        break;
746                    case '\t':
747                        sb.append("\\t");
748                        break;
749                    default:
750                        sb.append(ch);
751                }
752            }
753            return sb.toString();
754        }
755    }
756}