001package gudusoft.gsqlparser.tools; 002 003import java.io.*; 004import java.util.*; 005import java.util.stream.Collectors; 006import java.nio.file.Files; 007import java.nio.file.Paths; 008 009/** 010 * 1. take a json file as input 011 * 2. read json file created in CountSql.java with this structure: 012 * {"dbVendor":"dbvathena","statementType":"sstselect","sqlSize":156,"uniqueHashId":"0204e249ff1a6ae3f49db491e9709f13762314e95080e988d3ef99fe6b9c1773","sqlStatement":"WITH\r\ndataset AS (\r\n SELECT ARRAY [1,2,2,3,3,4,5] AS items\r\n)\r\nSELECT array_agg(distinct i) AS array_items\r\nFROM dataset\r\nCROSS JOIN UNNEST(items) AS t(i);"} 013 * 3. Found the sql statement in the json file with the same uniqueHashId which means the sql statement is duplicate 014 * 4. list the number of duplicate sql statements of under the same uniqueHashId, only list the number of duplicate sql statements that are more than 1 015 * 5. list the uniqueHashId,statementType, sqlStatement of the duplicate sql statements in the order of the number of duplicate sql statements from high to low 016 * 6. output the result to a csv file named as duplicate_sql_statements.csv 017 * 018 * sample output: 019 * Total JSON records processed: 54617 020 * Analysis complete. Results written to: 021 * - Summary CSV: c:\prg\tmp\o.csv 022 * - Detailed CSV with SQL examples: c:\prg\tmp\o_with_sql.csv 023 * Total SQL statements processed: 54617 024 * Unique SQL statement types: 24669 025 * Found 9257 duplicate statement types. 026 * Total duplicate statements: 29948 027 * 028 */ 029public class ManageSqlInJson { 030 031 // Class to hold SQL statement metadata from JSON 032 private static class SqlStatementInfo { 033 String uniqueHashId; 034 String statementType; 035 String dbVendor; 036 String sqlStatement; 037 int count = 1; 038 039 public SqlStatementInfo(String uniqueHashId, String statementType, String dbVendor, String sqlStatement) { 040 this.uniqueHashId = uniqueHashId; 041 this.statementType = statementType; 042 this.dbVendor = dbVendor; 043 this.sqlStatement = sqlStatement; 044 } 045 } 046 047 public static void main(String[] args) { 048 if (args.length < 1) { 049 System.out.println("Usage: java ManageSqlInJson <json_file_path> [output_csv_path]"); 050 return; 051 } 052 053 String jsonFilePath = args[0]; 054 String outputCsvPath = args.length > 1 ? args[1] : "duplicate_sql_statements.csv"; 055 056 try { 057 // Process the JSON file and identify duplicates 058 Map<String, SqlStatementInfo> statementMap = findDuplicateStatements(jsonFilePath); 059 060 // Export results to CSV 061 exportDuplicatesToCsv(statementMap, outputCsvPath); 062 063 } catch (IOException e) { 064 System.err.println("Error processing file: " + e.getMessage()); 065 } 066 } 067 068 /** 069 * Find duplicate SQL statements in the JSON file 070 * @param jsonFilePath Path to JSON file 071 * @return Map of statement hash IDs to statement information 072 * @throws IOException If there's an error reading the file 073 */ 074 private static Map<String, SqlStatementInfo> findDuplicateStatements(String jsonFilePath) throws IOException { 075 Map<String, SqlStatementInfo> statementMap = new HashMap<>(); 076 int totalProcessed = 0; 077 078 // Read the JSON file line by line 079 try (BufferedReader reader = Files.newBufferedReader(Paths.get(jsonFilePath))) { 080 String line; 081 while ((line = reader.readLine()) != null) { 082 if (line.trim().isEmpty()) { 083 continue; 084 } 085 086 totalProcessed++; 087 088 // Extract information from JSON 089 String uniqueHashId = extractJsonField(line, "uniqueHashId"); 090 String statementType = extractJsonField(line, "statementType"); 091 String dbVendor = extractJsonField(line, "dbVendor"); 092 String sqlStatement = extractJsonField(line, "sqlStatement"); 093 094 if (uniqueHashId == null || statementType == null || dbVendor == null || sqlStatement == null) { 095 System.err.println("Warning: Invalid JSON line: " + line); 096 continue; 097 } 098 099 // Count occurrences of each hash ID 100 if (statementMap.containsKey(uniqueHashId)) { 101 statementMap.get(uniqueHashId).count++; 102 // We already have a sample statement, no need to store this one 103 } else { 104 statementMap.put(uniqueHashId, new SqlStatementInfo(uniqueHashId, statementType, dbVendor, sqlStatement)); 105 } 106 } 107 } 108 109 System.out.println("Total JSON records processed: " + totalProcessed); 110 return statementMap; 111 } 112 113 /** 114 * Extract field value from a JSON string 115 * @param json JSON string 116 * @param fieldName Name of the field to extract 117 * @return Value of the field or null if not found 118 */ 119 private static String extractJsonField(String json, String fieldName) { 120 String fieldPattern = "\"" + fieldName + "\":\""; 121 int startIndex = json.indexOf(fieldPattern); 122 123 if (startIndex == -1) { 124 // Handle numeric fields (no quotes) 125 if (fieldName.equals("sqlSize")) { 126 fieldPattern = "\"" + fieldName + "\":"; 127 startIndex = json.indexOf(fieldPattern); 128 if (startIndex == -1) { 129 return null; 130 } 131 132 startIndex += fieldPattern.length(); 133 int endIndex = json.indexOf(",", startIndex); 134 if (endIndex == -1) { 135 endIndex = json.indexOf("}", startIndex); 136 } 137 138 if (endIndex == -1) { 139 return null; 140 } 141 142 return json.substring(startIndex, endIndex).trim(); 143 } 144 return null; 145 } 146 147 startIndex += fieldPattern.length(); 148 149 // For SQL statement field, we need to handle escaped quotes 150 if (fieldName.equals("sqlStatement")) { 151 // Find the closing quote, handling escaped quotes 152 StringBuilder value = new StringBuilder(); 153 boolean escaped = false; 154 for (int i = startIndex; i < json.length(); i++) { 155 char c = json.charAt(i); 156 157 if (escaped) { 158 value.append(c); 159 escaped = false; 160 } else if (c == '\\') { 161 value.append(c); 162 escaped = true; 163 } else if (c == '"') { 164 break; // End of the string value 165 } else { 166 value.append(c); 167 } 168 } 169 return value.toString(); 170 } else { 171 // Simple string field 172 int endIndex = json.indexOf("\"", startIndex); 173 174 if (endIndex == -1) { 175 return null; 176 } 177 178 return json.substring(startIndex, endIndex); 179 } 180 } 181 182 /** 183 * Export duplicate SQL statements to CSV 184 * @param statementMap Map of statement hash IDs to statement information 185 * @param outputPath Path to output CSV file 186 * @throws IOException If there's an error writing the file 187 */ 188 private static void exportDuplicatesToCsv(Map<String, SqlStatementInfo> statementMap, String outputPath) throws IOException { 189 // Filter for duplicates (count > 1) and sort by count (high to low) 190 List<SqlStatementInfo> duplicates = statementMap.values().stream() 191 .filter(info -> info.count > 1) 192 .sorted((a, b) -> Integer.compare(b.count, a.count)) 193 .collect(Collectors.toList()); 194 195 // Calculate statistics 196 int totalStatements = statementMap.values().stream().mapToInt(info -> info.count).sum(); 197 int totalDuplicates = duplicates.stream().mapToInt(info -> info.count).sum() - duplicates.size(); 198 int uniqueStatements = statementMap.size() - duplicates.size(); 199 200 // Create a separate file for the detailed SQL output 201 String sqlOutputPath = outputPath.replace(".csv", "_with_sql.csv"); 202 203 // Write to CSV 204 try (PrintWriter writer = new PrintWriter(new FileWriter(outputPath)); 205 PrintWriter sqlWriter = new PrintWriter(new FileWriter(sqlOutputPath))) { 206 207 // Write statistics header to both files 208 for (PrintWriter w : new PrintWriter[]{writer, sqlWriter}) { 209 w.println("Summary Statistics"); 210 w.println("Total SQL Statements Processed," + totalStatements); 211 w.println("Unique SQL Statement Types," + uniqueStatements); 212 w.println("Duplicate SQL Statement Types," + duplicates.size()); 213 w.println("Total Duplicate Statements," + totalDuplicates); 214 w.println(); 215 } 216 217 // Header for duplicate details (regular CSV) 218 writer.println("Duplicate SQL Statements Details"); 219 writer.println("UniqueHashId,StatementType,DbVendor,DuplicateCount"); 220 221 // Header for detailed output with SQL (separate file) 222 sqlWriter.println("Duplicate SQL Statements Details (With SQL Examples)"); 223 sqlWriter.println("UniqueHashId,StatementType,DbVendor,DuplicateCount,Example SQL Statement"); 224 225 // Data rows 226 for (SqlStatementInfo info : duplicates) { 227 // Regular CSV (without SQL) 228 writer.println(String.format("%s,%s,%s,%d", 229 info.uniqueHashId, 230 info.statementType, 231 info.dbVendor, 232 info.count)); 233 234 // Detailed CSV (with SQL) 235 sqlWriter.println(String.format("%s,%s,%s,%d,\"%s\"", 236 info.uniqueHashId, 237 info.statementType, 238 info.dbVendor, 239 info.count, 240 escapeForCsv(info.sqlStatement))); 241 } 242 243 // Print processing summary to console 244 System.out.println("Analysis complete. Results written to:"); 245 System.out.println("- Summary CSV: " + outputPath); 246 System.out.println("- Detailed CSV with SQL examples: " + sqlOutputPath); 247 System.out.println("Total SQL statements processed: " + totalStatements); 248 System.out.println("Unique SQL statement types: " + statementMap.size()); 249 System.out.println("Found " + duplicates.size() + " duplicate statement types."); 250 System.out.println("Total duplicate statements: " + totalDuplicates); 251 } 252 } 253 254 /** 255 * Escape a string for CSV output 256 * @param value The string to escape 257 * @return Escaped string safe for CSV 258 */ 259 private static String escapeForCsv(String value) { 260 if (value == null) { 261 return ""; 262 } 263 264 // Replace double quotes with two double quotes (CSV standard) 265 value = value.replace("\"", "\"\""); 266 267 // Replace newlines with space to keep CSV record on one line 268 value = value.replace("\r\n", " ").replace("\n", " ").replace("\r", " "); 269 270 return value; 271 } 272}