001package gudusoft.gsqlparser.dlineage.util; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.dlineage.dataflow.model.DataflowRemoveOption; 005import gudusoft.gsqlparser.dlineage.dataflow.model.RelationshipType; 006import gudusoft.gsqlparser.dlineage.dataflow.model.ResultSetType; 007import gudusoft.gsqlparser.dlineage.dataflow.model.xml.*; 008import gudusoft.gsqlparser.util.Logger; 009import gudusoft.gsqlparser.util.LoggerFactory; 010import gudusoft.gsqlparser.util.SQLUtil; 011 012import java.util.*; 013 014public class DataflowRemoveHelper { 015 016 private static final Logger logger = LoggerFactory.getLogger(DataflowRemoveHelper.class); 017 018 private Map<String, Boolean> isTables = new HashMap<String, Boolean>(); 019 private Map<String, Boolean> targetTables = new HashMap<String, Boolean>(); 020 021 public dataflow remove(dataflow instance, EDbVendor dbVendor, DataflowRemoveOption option) { 022 try { 023 targetTables.clear(); 024 isTables.clear(); 025 dataflow simple = new dataflow(); 026 List<relationship> simpleRelations = new ArrayList<relationship>(); 027 List<relationship> relations = instance.getRelationships(); 028 if (relations != null && relations.size() > 0) { 029 Map<String, Set<relationship>> targetIdRelationMap = new HashMap<String, Set<relationship>>(); 030 for (relationship relation : relations) { 031 if (relation.getTarget() != null) { 032 String key = relation.getTarget().getParent_id() + "." + relation.getTarget().getId(); 033 if (!targetIdRelationMap.containsKey(key)) { 034 targetIdRelationMap.put(key, new HashSet<relationship>()); 035 } 036 targetIdRelationMap.get(key).add(relation); 037 } 038 } 039 040 long maxId = Long 041 .parseLong(relations.get(relations.size() - 1).getId().split("\\-")[0].replace("_", "")) * 100; 042 for (int i = 0; i < relations.size(); i++) { 043 relationship relationElem = relations.get(i); 044 if (RelationshipType.call.name().equals(relationElem.getType())) { 045 simpleRelations.add(relationElem); 046 continue; 047 } 048 if (RelationshipType.er.name().equals(relationElem.getType())) { 049 simpleRelations.add(relationElem); 050 continue; 051 } 052 targetColumn target = relationElem.getTarget(); 053 String targetParent = target.getParent_id(); 054 if (isTarget(option, instance, targetParent)) { 055 List<Pair<sourceColumn, List<String>>> relationSources = new ArrayList<Pair<sourceColumn, List<String>>>(); 056 findSourceRelations(option, dbVendor, target, instance, targetIdRelationMap, relationElem, relationSources, 057 new String[]{relationElem.getType()}); 058 if (relationSources.size() > 0) { 059 Map<sourceColumn, List<Pair<sourceColumn, List<String>>>> columnMap = new HashMap<sourceColumn, List<Pair<sourceColumn, List<String>>>>(); 060 for (Pair<sourceColumn, List<String>> item : relationSources) { 061 if (!columnMap.containsKey(item.first)) { 062 columnMap.put(item.first, new ArrayList<Pair<sourceColumn, List<String>>>()); 063 } 064 columnMap.get(item.first).add(item); 065 } 066 067 Iterator<sourceColumn> iter = columnMap.keySet().iterator(); 068 Map<String, List<sourceColumn>> relationSourceMap = new HashMap<String, List<sourceColumn>>(); 069 while (iter.hasNext()) { 070 sourceColumn column = iter.next(); 071 String relationType = mergeRelationType(columnMap.get(column)); 072 if (!relationSourceMap.containsKey(relationType)) { 073 relationSourceMap.put(relationType, new ArrayList<sourceColumn>()); 074 } 075 relationSourceMap.get(relationType).add(column); 076 } 077 078 Iterator<String> sourceIter = relationSourceMap.keySet().iterator(); 079 while (sourceIter.hasNext()) { 080 String relationType = sourceIter.next(); 081 relationship simpleRelation = (relationship) relationElem.clone(); 082 simpleRelation.setSources(relationSourceMap.get(relationType)); 083 simpleRelation.setType(relationType); 084 simpleRelation.setId(String.valueOf(++maxId)); 085 simpleRelations.add(simpleRelation); 086 } 087 } 088 } 089 } 090 } 091 simple.setErrors(instance.getErrors()); 092 simple.setPackages(instance.getPackages()); 093 simple.setProcedures(instance.getProcedures()); 094 simple.setProcesses(instance.getProcesses()); 095 simple.setTables(instance.getTables()); 096 simple.setViews(instance.getViews()); 097 simple.setDatabases(instance.getDatabases()); 098 simple.setSchemas(instance.getSchemas()); 099 simple.setStages(instance.getStages()); 100 simple.setSequences(instance.getSequences()); 101 simple.setDatasources(instance.getDatasources()); 102 simple.setStreams(instance.getStreams()); 103 simple.setPaths(instance.getPaths()); 104 105 if (instance.getVariables() != null) { 106 List<table> variables = new ArrayList<table>(); 107 for (int i = 0; i < instance.getVariables().size(); i++) { 108 table variable = instance.getVariables().get(i); 109 if (!isRemoveVariable(option, variable)) { 110 variables.add(variable); 111 } 112 } 113 simple.setVariables(variables); 114 } 115 if (instance.getResultsets() != null) { 116 List<table> resultSets = new ArrayList<table>(); 117 for (int i = 0; i < instance.getResultsets().size(); i++) { 118 table resultSet = instance.getResultsets().get(i); 119 if (!isRemoveResultSet(option, resultSet)) { 120 resultSets.add(resultSet); 121 } 122 } 123 simple.setResultsets(resultSets); 124 } 125 simple.setRelationships(simpleRelations); 126 simple.setOrientation(instance.getOrientation()); 127 return simple; 128 } catch (Exception e) { 129 logger.error("Remove dataflow resultSet failed.", e); 130 } 131 return instance; 132 } 133 134 private boolean isRemoveVariable(DataflowRemoveOption option, table variable) { 135 if (option.isRemoveVariable()) { 136 if ("variable".equals(variable.getType())) { 137 return true; 138 } 139 } else if (option.isRemoveCursor()) { 140 if ("variable".equals(variable.getType()) && "cursor".equals(variable.getSubType())) { 141 return true; 142 } 143 } 144 return false; 145 } 146 147 private boolean isRemoveResultSet(DataflowRemoveOption option, table resultSet) { 148 ResultSetType resultSetType = ResultSetType.of(resultSet.getType()); 149 if (option.getRemoveResultSetTypes().contains(resultSetType)) { 150 return true; 151 } 152 if (option.getRemoveResultSetTypes().contains(ResultSetType.function)) { 153 if ("function".equals(resultSet.getSubType())) { 154 return true; 155 } 156 } 157 return false; 158 } 159 160 private void findSourceRelations(DataflowRemoveOption option, EDbVendor dbVendor, targetColumn target, dataflow instance, Map<String, Set<relationship>> sourceIdRelationMap, 161 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes) { 162 findStarSourceRelations(option, dbVendor, target, instance, null, sourceIdRelationMap, targetRelation, relationSources, pathTypes, 163 new HashSet<String>(), new LinkedHashSet<transform>(), new LinkedHashSet<candidateTable>()); 164 } 165 166 private void findStarSourceRelations(DataflowRemoveOption option, EDbVendor dbVendor, targetColumn target, dataflow instance, targetColumn starRelationTarget, Map<String, Set<relationship>> sourceIdRelationMap, 167 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes, 168 Set<String> paths, Set<transform> transforms, Set<candidateTable> candidateTables) { 169 if (targetRelation != null && targetRelation.getSources() != null) { 170 for (int i = 0; i < targetRelation.getSources().size(); i++) { 171 sourceColumn source = targetRelation.getSources().get(i); 172 173 if (starRelationTarget != null && !"*".equals(source.getColumn()) 174 && !DlineageUtil.getIdentifierNormalColumnName(starRelationTarget.getColumn(), dbVendor) 175 .equals(DlineageUtil.getIdentifierNormalColumnName(source.getColumn(), dbVendor))) { 176 continue; 177 } 178 179 String sourceColumnId = source.getId(); 180 String sourceParentId = source.getParent_id(); 181 if (sourceParentId == null || sourceColumnId == null) { 182 continue; 183 } 184 if (isTarget(option, instance, sourceParentId)) { 185 List<transform> transforms2 = new ArrayList<transform>(transforms.size()); 186 transforms2.addAll(transforms); 187 Collections.reverse(transforms2); 188 189 List<candidateTable> candidateTables2 = new ArrayList<candidateTable>(candidateTables.size()); 190 candidateTables2.addAll(candidateTables); 191 192 sourceColumn sourceColumnCopy = DlineageUtil.copySourceColumn(source); 193 for (transform t : transforms2) { 194 sourceColumnCopy.addTransform(t); 195 } 196 for (candidateTable t : candidateTables2) { 197 sourceColumnCopy.addCandidateParent(t); 198 } 199 200 if (Boolean.TRUE.equals(target.isStruct()) && Boolean.TRUE.equals(source.isStruct())) { 201 List<String> targetColumns = SQLUtil.parseNames(target.getColumn()); 202 List<String> sourceColumns = SQLUtil.parseNames(source.getColumn()); 203 if (!DlineageUtil.getIdentifierNormalColumnName(targetColumns.get(targetColumns.size() - 1), dbVendor) 204 .equals(DlineageUtil.getIdentifierNormalColumnName(sourceColumns.get(sourceColumns.size() - 1), dbVendor))) { 205 continue; 206 } 207 } 208 relationSources.add(new Pair<sourceColumn, List<String>>(sourceColumnCopy, Arrays.asList(pathTypes))); 209 } else { 210 Set<relationship> sourceRelations = sourceIdRelationMap 211 .get(source.getParent_id() + "." + source.getId()); 212 if (sourceRelations != null) { 213 if (paths.contains(source.getParent_id() + "." + source.getId())) { 214 continue; 215 } else { 216 paths.add(source.getParent_id() + "." + source.getId()); 217 if (source.getTransforms() != null) { 218 transforms.addAll(source.getTransforms()); 219 } 220 if (source.getCandidateParents() != null) { 221 candidateTables.addAll(source.getCandidateParents()); 222 } 223 } 224 if (sourceRelations != null) { 225 for (relationship relation : sourceRelations) { 226 LinkedHashSet<transform> transforms2 = new LinkedHashSet<transform>(transforms.size()); 227 transforms2.addAll(transforms); 228 LinkedHashSet<candidateTable> candidateTables2 = new LinkedHashSet<candidateTable>(candidateTables.size()); 229 candidateTables2.addAll(candidateTables); 230 String[] types = new String[pathTypes.length + 1]; 231 types[0] = relation.getType(); 232 System.arraycopy(pathTypes, 0, types, 1, pathTypes.length); 233 if (!"*".equals(source.getColumn())) { 234 findStarSourceRelations(option, dbVendor, target, instance, null, sourceIdRelationMap, relation, relationSources, 235 types, paths, transforms2, candidateTables2); 236 } else { 237 findStarSourceRelations(option, dbVendor, target, instance, 238 starRelationTarget == null ? targetRelation.getTarget() : starRelationTarget, 239 sourceIdRelationMap, relation, relationSources, types, paths, transforms, candidateTables2); 240 } 241 } 242 } 243 } 244 } 245 } 246 } 247 } 248 249 private boolean isTarget(DataflowRemoveOption option, dataflow instance, String targetParentId) { 250 if (targetTables.containsKey(targetParentId)) 251 return targetTables.get(targetParentId); 252 if (isTable(instance, targetParentId)) { 253 targetTables.put(targetParentId, true); 254 return true; 255 } else if (isView(instance, targetParentId)) { 256 targetTables.put(targetParentId, true); 257 return true; 258 } else if (isStage(instance, targetParentId)) { 259 targetTables.put(targetParentId, true); 260 return true; 261 } else if (isSequence(instance, targetParentId)) { 262 targetTables.put(targetParentId, true); 263 return true; 264 } else if (isDataSource(instance, targetParentId)) { 265 targetTables.put(targetParentId, true); 266 return true; 267 } else if (isDatabase(instance, targetParentId)) { 268 targetTables.put(targetParentId, true); 269 return true; 270 } else if (isSchema(instance, targetParentId)) { 271 targetTables.put(targetParentId, true); 272 return true; 273 } else if (isStream(instance, targetParentId)) { 274 targetTables.put(targetParentId, true); 275 return true; 276 } else if (isFile(instance, targetParentId)) { 277 targetTables.put(targetParentId, true); 278 return true; 279 } else if (isVariable(option, instance, targetParentId)) { 280 targetTables.put(targetParentId, true); 281 return true; 282 } else if (isTargetResultSet(option, instance, targetParentId)) { 283 targetTables.put(targetParentId, true); 284 return true; 285 } 286 targetTables.put(targetParentId, false); 287 return false; 288 } 289 290 private boolean isTable(dataflow instance, String targetParentId) { 291 if (isTables.containsKey(targetParentId)) { 292 return isTables.get(targetParentId); 293 } 294 if (instance.getTables() != null) { 295 for (int i = 0; i < instance.getTables().size(); i++) { 296 table resultSet = instance.getTables().get(i); 297 isTables.put(resultSet.getId(), true); 298 if (resultSet.getId().equalsIgnoreCase(targetParentId)) { 299 return true; 300 } 301 } 302 } 303 isTables.put(targetParentId, false); 304 return false; 305 } 306 307 private boolean isView(dataflow instance, String targetParent) { 308 if (instance.getViews() != null) { 309 for (int i = 0; i < instance.getViews().size(); i++) { 310 table resultSet = instance.getViews().get(i); 311 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 312 return true; 313 } 314 } 315 } 316 return false; 317 } 318 319 private boolean isStage(dataflow instance, String targetParent) { 320 if (instance.getStages() != null) { 321 for (int i = 0; i < instance.getStages().size(); i++) { 322 table resultSet = instance.getStages().get(i); 323 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 324 return true; 325 } 326 } 327 } 328 return false; 329 } 330 331 private boolean isSequence(dataflow instance, String targetParent) { 332 if (instance.getSequences() != null) { 333 for (int i = 0; i < instance.getSequences().size(); i++) { 334 table resultSet = instance.getSequences().get(i); 335 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 336 return true; 337 } 338 } 339 } 340 return false; 341 } 342 343 private boolean isDataSource(dataflow instance, String targetParent) { 344 if (instance.getDatasources() != null) { 345 for (int i = 0; i < instance.getDatasources().size(); i++) { 346 table resultSet = instance.getDatasources().get(i); 347 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 348 return true; 349 } 350 } 351 } 352 return false; 353 } 354 355 private boolean isDatabase(dataflow instance, String targetParent) { 356 if (instance.getDatabases() != null) { 357 for (int i = 0; i < instance.getDatabases().size(); i++) { 358 table resultSet = instance.getDatabases().get(i); 359 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 360 return true; 361 } 362 } 363 } 364 return false; 365 } 366 367 private boolean isSchema(dataflow instance, String targetParent) { 368 if (instance.getSchemas() != null) { 369 for (int i = 0; i < instance.getSchemas().size(); i++) { 370 table resultSet = instance.getSchemas().get(i); 371 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 372 return true; 373 } 374 } 375 } 376 return false; 377 } 378 379 private boolean isStream(dataflow instance, String targetParent) { 380 if (instance.getStreams() != null) { 381 for (int i = 0; i < instance.getStreams().size(); i++) { 382 table resultSet = instance.getStreams().get(i); 383 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 384 return true; 385 } 386 } 387 } 388 return false; 389 } 390 391 private boolean isFile(dataflow instance, String targetParent) { 392 if (instance.getPaths() != null) { 393 for (int i = 0; i < instance.getPaths().size(); i++) { 394 table resultSet = instance.getPaths().get(i); 395 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 396 return true; 397 } 398 } 399 } 400 return false; 401 } 402 403 private boolean isVariable(DataflowRemoveOption option, dataflow instance, String targetParent) { 404 if (instance.getVariables() != null) { 405 for (int i = 0; i < instance.getVariables().size(); i++) { 406 table variable = instance.getVariables().get(i); 407 if (variable.getId().equalsIgnoreCase(targetParent)) { 408 return !isRemoveVariable(option, variable); 409 } 410 } 411 } 412 return false; 413 } 414 415 private boolean isTargetResultSet(DataflowRemoveOption option, dataflow instance, String targetParent) { 416 if (instance.getResultsets() != null) { 417 for (int i = 0; i < instance.getResultsets().size(); i++) { 418 table resultSet = instance.getResultsets().get(i); 419 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 420 return !isRemoveResultSet(option, resultSet); 421 } 422 } 423 } 424 return false; 425 } 426 427 private String mergeRelationType(List<Pair<sourceColumn, List<String>>> typePaths) { 428 RelationshipType relationType = RelationshipType.join; 429 for (int i = 0; i < typePaths.size(); i++) { 430 List<String> path = typePaths.get(i).second; 431 RelationshipType type = RelationshipType.valueOf(getRelationType(path)); 432 if (type.ordinal() < relationType.ordinal()) { 433 relationType = type; 434 } 435 } 436 return relationType.name(); 437 } 438 439 private String getRelationType(List<String> typePaths) { 440 if (typePaths.contains("join")) 441 return "join"; 442 if (typePaths.contains("fdr")) 443 return "fdr"; 444 if (typePaths.contains("frd")) 445 return "frd"; 446 if (typePaths.contains("fddi")) 447 return "fddi"; 448 return "fdd"; 449 } 450 451}