001package gudusoft.gsqlparser.dlineage.util; 002 003import java.util.*; 004 005import gudusoft.gsqlparser.EDbVendor; 006import gudusoft.gsqlparser.dlineage.dataflow.model.RelationshipType; 007import gudusoft.gsqlparser.dlineage.dataflow.model.xml.*; 008import gudusoft.gsqlparser.util.Logger; 009import gudusoft.gsqlparser.util.LoggerFactory; 010import gudusoft.gsqlparser.util.SQLUtil; 011 012public class RemoveDataflowFunction { 013 014 private static final Logger logger = LoggerFactory.getLogger(DataflowRemoveHelper.class); 015 private Map<String, Boolean> isTables = new HashMap<String, Boolean>(); 016 private Map<String, Boolean> targetTables = new HashMap<String, Boolean>(); 017 018 public dataflow removeFunction(dataflow instance, EDbVendor dbVendor) { 019 if (instance.getResultsets() == null || instance.getResultsets().isEmpty()) { 020 return instance; 021 } 022 try { 023 targetTables.clear(); 024 isTables.clear(); 025 dataflow simple = new dataflow(); 026 List<relationship> simpleRelations = new ArrayList<relationship>(); 027 List<relationship> relations = instance.getRelationships(); 028 if (relations != null && relations.size() > 0) { 029 Map<String, Set<relationship>> targetIdRelationMap = new HashMap<String, Set<relationship>>(); 030 for (relationship relation : relations) { 031 if (relation.getTarget() != null) { 032 String key = relation.getTarget().getParent_id() + "." + relation.getTarget().getId(); 033 if (!targetIdRelationMap.containsKey(key)) { 034 targetIdRelationMap.put(key, new HashSet<relationship>()); 035 } 036 targetIdRelationMap.get(key).add(relation); 037 } 038 } 039 040 long maxId = Long 041 .parseLong(relations.get(relations.size() - 1).getId().split("\\-")[0].replace("_", "")) * 100; 042 for (int i = 0; i < relations.size(); i++) { 043 relationship relationElem = relations.get(i); 044 if (RelationshipType.call.name().equals(relationElem.getType())) { 045 simpleRelations.add(relationElem); 046 continue; 047 } 048 if (RelationshipType.er.name().equals(relationElem.getType())) { 049 simpleRelations.add(relationElem); 050 continue; 051 } 052 targetColumn target = relationElem.getTarget(); 053 String targetParent = target.getParent_id(); 054 if (isTarget(instance, targetParent)) { 055 List<Pair<sourceColumn, List<String>>> relationSources = new ArrayList<Pair<sourceColumn, List<String>>>(); 056 findSourceRelations(dbVendor, target, instance, targetIdRelationMap, relationElem, relationSources, 057 new String[]{relationElem.getType()}); 058 if (relationSources.size() > 0) { 059 Map<sourceColumn, List<Pair<sourceColumn, List<String>>>> columnMap = new HashMap<sourceColumn, List<Pair<sourceColumn, List<String>>>>(); 060 for (Pair<sourceColumn, List<String>> item : relationSources) { 061 if (!columnMap.containsKey(item.first)) { 062 columnMap.put(item.first, new ArrayList<Pair<sourceColumn, List<String>>>()); 063 } 064 columnMap.get(item.first).add(item); 065 } 066 067 Iterator<sourceColumn> iter = columnMap.keySet().iterator(); 068 Map<String, List<sourceColumn>> relationSourceMap = new HashMap<String, List<sourceColumn>>(); 069 while (iter.hasNext()) { 070 sourceColumn column = iter.next(); 071 String relationType = mergeRelationType(columnMap.get(column)); 072 if (!relationSourceMap.containsKey(relationType)) { 073 relationSourceMap.put(relationType, new ArrayList<sourceColumn>()); 074 } 075 relationSourceMap.get(relationType).add(column); 076 } 077 078 Iterator<String> sourceIter = relationSourceMap.keySet().iterator(); 079 while (sourceIter.hasNext()) { 080 String relationType = sourceIter.next(); 081 relationship simpleRelation = (relationship) relationElem.clone(); 082 simpleRelation.setSources(relationSourceMap.get(relationType)); 083 simpleRelation.setType(relationType); 084 simpleRelation.setId(String.valueOf(++maxId)); 085 simpleRelations.add(simpleRelation); 086 } 087 } 088 } 089 } 090 } 091 simple.setErrors(instance.getErrors()); 092 simple.setPackages(instance.getPackages()); 093 simple.setProcedures(instance.getProcedures()); 094 simple.setProcesses(instance.getProcesses()); 095 simple.setTables(instance.getTables()); 096 simple.setViews(instance.getViews()); 097 simple.setDatabases(instance.getDatabases()); 098 simple.setSchemas(instance.getSchemas()); 099 simple.setStages(instance.getStages()); 100 simple.setSequences(instance.getSequences()); 101 simple.setDatasources(instance.getDatasources()); 102 simple.setStreams(instance.getStreams()); 103 simple.setPaths(instance.getPaths()); 104 simple.setVariables(instance.getVariables()); 105 if (instance.getResultsets() != null) { 106 List<table> resultSets = new ArrayList<table>(); 107 for (int i = 0; i < instance.getResultsets().size(); i++) { 108 table resultSet = instance.getResultsets().get(i); 109 if (!isFunction(resultSet)) { 110 resultSets.add(resultSet); 111 } 112 } 113 simple.setResultsets(resultSets); 114 } 115 simple.setRelationships(simpleRelations); 116 simple.setOrientation(instance.getOrientation()); 117 return simple; 118 } catch (Exception e) { 119 logger.error("Remove dataflow function failed.", e); 120 } 121 return instance; 122 } 123 124 private boolean isFunction(table resultSet) { 125 if ("function".equals(resultSet.getType())) { 126 return true; 127 } else if ("resultset".equals(resultSet.getType()) && "function".equals(resultSet.getSubType())) { 128 return true; 129 } 130 return false; 131 } 132 133 private void findSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, Map<String, Set<relationship>> sourceIdRelationMap, 134 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes) { 135 findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, targetRelation, relationSources, pathTypes, 136 new HashSet<String>(), new LinkedHashSet<transform>(), new LinkedHashSet<candidateTable>()); 137 } 138 139 private void findStarSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, targetColumn starRelationTarget, Map<String, Set<relationship>> sourceIdRelationMap, 140 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes, 141 Set<String> paths, Set<transform> transforms, Set<candidateTable> candidateTables) { 142 if (targetRelation != null && targetRelation.getSources() != null) { 143 for (int i = 0; i < targetRelation.getSources().size(); i++) { 144 sourceColumn source = targetRelation.getSources().get(i); 145 146 if (starRelationTarget != null && !"*".equals(source.getColumn()) 147 && !DlineageUtil.getIdentifierNormalColumnName(starRelationTarget.getColumn(), dbVendor) 148 .equals(DlineageUtil.getIdentifierNormalColumnName(source.getColumn(), dbVendor))) { 149 continue; 150 } 151 152 String sourceColumnId = source.getId(); 153 String sourceParentId = source.getParent_id(); 154 if (sourceParentId == null || sourceColumnId == null) { 155 continue; 156 } 157 if (isTarget(instance, sourceParentId)) { 158 List<transform> transforms2 = new ArrayList<transform>(transforms.size()); 159 transforms2.addAll(transforms); 160 Collections.reverse(transforms2); 161 162 List<candidateTable> candidateTables2 = new ArrayList<candidateTable>(candidateTables.size()); 163 candidateTables2.addAll(candidateTables); 164 165 sourceColumn sourceColumnCopy = DlineageUtil.copySourceColumn(source); 166 for (transform t : transforms2) { 167 sourceColumnCopy.addTransform(t); 168 } 169 for (candidateTable t : candidateTables2) { 170 sourceColumnCopy.addCandidateParent(t); 171 } 172 173 if (Boolean.TRUE.equals(target.isStruct()) && Boolean.TRUE.equals(source.isStruct())) { 174 List<String> targetColumns = SQLUtil.parseNames(target.getColumn()); 175 List<String> sourceColumns = SQLUtil.parseNames(source.getColumn()); 176 if (!DlineageUtil.getIdentifierNormalColumnName(targetColumns.get(targetColumns.size() - 1), dbVendor) 177 .equals(DlineageUtil.getIdentifierNormalColumnName(sourceColumns.get(sourceColumns.size() - 1), dbVendor))) { 178 continue; 179 } 180 } 181 relationSources.add(new Pair<sourceColumn, List<String>>(sourceColumnCopy, Arrays.asList(pathTypes))); 182 } else { 183 Set<relationship> sourceRelations = sourceIdRelationMap 184 .get(source.getParent_id() + "." + source.getId()); 185 if (sourceRelations != null) { 186 if (paths.contains(source.getParent_id() + "." + source.getId())) { 187 continue; 188 } else { 189 paths.add(source.getParent_id() + "." + source.getId()); 190 if (source.getTransforms() != null) { 191 transforms.addAll(source.getTransforms()); 192 } 193 if (source.getCandidateParents() != null) { 194 candidateTables.addAll(source.getCandidateParents()); 195 } 196 } 197 if (sourceRelations != null) { 198 for (relationship relation : sourceRelations) { 199 LinkedHashSet<transform> transforms2 = new LinkedHashSet<transform>(transforms.size()); 200 transforms2.addAll(transforms); 201 LinkedHashSet<candidateTable> candidateTables2 = new LinkedHashSet<candidateTable>(candidateTables.size()); 202 candidateTables2.addAll(candidateTables); 203 String[] types = new String[pathTypes.length + 1]; 204 types[0] = relation.getType(); 205 System.arraycopy(pathTypes, 0, types, 1, pathTypes.length); 206 if (!"*".equals(source.getColumn())) { 207 findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, relation, relationSources, 208 types, paths, transforms2, candidateTables2); 209 } else { 210 findStarSourceRelations(dbVendor, target, instance, 211 starRelationTarget == null ? targetRelation.getTarget() : starRelationTarget, 212 sourceIdRelationMap, relation, relationSources, types, paths, transforms, candidateTables2); 213 } 214 } 215 } 216 } 217 } 218 } 219 } 220 } 221 222 private boolean isTarget(dataflow instance, String targetParentId) { 223 if (targetTables.containsKey(targetParentId)) 224 return targetTables.get(targetParentId); 225 if (isTable(instance, targetParentId)) { 226 targetTables.put(targetParentId, true); 227 return true; 228 } else if (isView(instance, targetParentId)) { 229 targetTables.put(targetParentId, true); 230 return true; 231 } else if (isStage(instance, targetParentId)) { 232 targetTables.put(targetParentId, true); 233 return true; 234 } else if (isSequence(instance, targetParentId)) { 235 targetTables.put(targetParentId, true); 236 return true; 237 } else if (isDataSource(instance, targetParentId)) { 238 targetTables.put(targetParentId, true); 239 return true; 240 } else if (isDatabase(instance, targetParentId)) { 241 targetTables.put(targetParentId, true); 242 return true; 243 } else if (isSchema(instance, targetParentId)) { 244 targetTables.put(targetParentId, true); 245 return true; 246 } else if (isStream(instance, targetParentId)) { 247 targetTables.put(targetParentId, true); 248 return true; 249 } else if (isFile(instance, targetParentId)) { 250 targetTables.put(targetParentId, true); 251 return true; 252 } else if (isVariable(instance, targetParentId)) { 253 targetTables.put(targetParentId, true); 254 return true; 255 } else if (isTargetResultSet(instance, targetParentId)) { 256 targetTables.put(targetParentId, true); 257 return true; 258 } 259 targetTables.put(targetParentId, false); 260 return false; 261 } 262 263 private boolean isTable(dataflow instance, String targetParentId) { 264 if (isTables.containsKey(targetParentId)) { 265 return isTables.get(targetParentId); 266 } 267 if (instance.getTables() != null) { 268 for (int i = 0; i < instance.getTables().size(); i++) { 269 table resultSet = instance.getTables().get(i); 270 isTables.put(resultSet.getId(), true); 271 if (resultSet.getId().equalsIgnoreCase(targetParentId)) { 272 return true; 273 } 274 } 275 } 276 isTables.put(targetParentId, false); 277 return false; 278 } 279 280 private boolean isView(dataflow instance, String targetParent) { 281 if (instance.getViews() != null) { 282 for (int i = 0; i < instance.getViews().size(); i++) { 283 table resultSet = instance.getViews().get(i); 284 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 285 return true; 286 } 287 } 288 } 289 return false; 290 } 291 292 private boolean isStage(dataflow instance, String targetParent) { 293 if (instance.getStages() != null) { 294 for (int i = 0; i < instance.getStages().size(); i++) { 295 table resultSet = instance.getStages().get(i); 296 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 297 return true; 298 } 299 } 300 } 301 return false; 302 } 303 304 private boolean isSequence(dataflow instance, String targetParent) { 305 if (instance.getSequences() != null) { 306 for (int i = 0; i < instance.getSequences().size(); i++) { 307 table resultSet = instance.getSequences().get(i); 308 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 309 return true; 310 } 311 } 312 } 313 return false; 314 } 315 316 private boolean isDataSource(dataflow instance, String targetParent) { 317 if (instance.getDatasources() != null) { 318 for (int i = 0; i < instance.getDatasources().size(); i++) { 319 table resultSet = instance.getDatasources().get(i); 320 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 321 return true; 322 } 323 } 324 } 325 return false; 326 } 327 328 private boolean isDatabase(dataflow instance, String targetParent) { 329 if (instance.getDatabases() != null) { 330 for (int i = 0; i < instance.getDatabases().size(); i++) { 331 table resultSet = instance.getDatabases().get(i); 332 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 333 return true; 334 } 335 } 336 } 337 return false; 338 } 339 340 private boolean isSchema(dataflow instance, String targetParent) { 341 if (instance.getSchemas() != null) { 342 for (int i = 0; i < instance.getSchemas().size(); i++) { 343 table resultSet = instance.getSchemas().get(i); 344 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 345 return true; 346 } 347 } 348 } 349 return false; 350 } 351 352 private boolean isStream(dataflow instance, String targetParent) { 353 if (instance.getStreams() != null) { 354 for (int i = 0; i < instance.getStreams().size(); i++) { 355 table resultSet = instance.getStreams().get(i); 356 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 357 return true; 358 } 359 } 360 } 361 return false; 362 } 363 364 private boolean isFile(dataflow instance, String targetParent) { 365 if (instance.getPaths() != null) { 366 for (int i = 0; i < instance.getPaths().size(); i++) { 367 table resultSet = instance.getPaths().get(i); 368 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 369 return true; 370 } 371 } 372 } 373 return false; 374 } 375 376 private boolean isVariable(dataflow instance, String targetParent) { 377 if (instance.getVariables() != null) { 378 for (int i = 0; i < instance.getVariables().size(); i++) { 379 table resultSet = instance.getVariables().get(i); 380 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 381 return true; 382 } 383 } 384 } 385 return false; 386 } 387 388 private boolean isTargetResultSet(dataflow instance, String targetParent) { 389 if (instance.getResultsets() != null) { 390 for (int i = 0; i < instance.getResultsets().size(); i++) { 391 table resultSet = instance.getResultsets().get(i); 392 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 393 return !isFunction(resultSet); 394 } 395 } 396 } 397 return false; 398 } 399 400 private String mergeRelationType(List<Pair<sourceColumn, List<String>>> typePaths) { 401 RelationshipType relationType = RelationshipType.join; 402 for (int i = 0; i < typePaths.size(); i++) { 403 List<String> path = typePaths.get(i).second; 404 RelationshipType type = RelationshipType.valueOf(getRelationType(path)); 405 if (type.ordinal() < relationType.ordinal()) { 406 relationType = type; 407 } 408 } 409 return relationType.name(); 410 } 411 412 private String getRelationType(List<String> typePaths) { 413 if (typePaths.contains("join")) 414 return "join"; 415 if (typePaths.contains("fdr")) 416 return "fdr"; 417 if (typePaths.contains("frd")) 418 return "frd"; 419 if (typePaths.contains("fddi")) 420 return "fddi"; 421 return "fdd"; 422 } 423 424}