001package gudusoft.gsqlparser.dlineage.util; 002 003import java.util.*; 004 005import gudusoft.gsqlparser.EDbVendor; 006import gudusoft.gsqlparser.dlineage.dataflow.model.RelationshipType; 007import gudusoft.gsqlparser.dlineage.dataflow.model.xml.*; 008import gudusoft.gsqlparser.util.Logger; 009import gudusoft.gsqlparser.util.LoggerFactory; 010import gudusoft.gsqlparser.util.SQLUtil; 011 012public class RemoveDataflowFunction { 013 014 private static final Logger logger = LoggerFactory.getLogger(DataflowRemoveHelper.class); 015 private Map<String, Boolean> isTables = new HashMap<String, Boolean>(); 016 private Map<String, Boolean> targetTables = new HashMap<String, Boolean>(); 017 018 public dataflow removeFunction(dataflow instance, EDbVendor dbVendor) { 019 if (instance.getResultsets() == null || instance.getResultsets().isEmpty()) { 020 return instance; 021 } 022 try { 023 targetTables.clear(); 024 isTables.clear(); 025 dataflow simple = new dataflow(); 026 List<relationship> simpleRelations = new ArrayList<relationship>(); 027 List<relationship> relations = instance.getRelationships(); 028 if (relations != null && relations.size() > 0) { 029 Map<String, Set<relationship>> targetIdRelationMap = new HashMap<String, Set<relationship>>(); 030 for (relationship relation : relations) { 031 if (relation.getTarget() != null) { 032 String key = relation.getTarget().getParent_id() + "." + relation.getTarget().getId(); 033 if (!targetIdRelationMap.containsKey(key)) { 034 targetIdRelationMap.put(key, new HashSet<relationship>()); 035 } 036 targetIdRelationMap.get(key).add(relation); 037 } 038 } 039 040 long maxId = Long 041 .parseLong(relations.get(relations.size() - 1).getId().split("\\-")[0].replace("_", "")) * 100; 042 for (int i = 0; i < relations.size(); i++) { 043 relationship relationElem = relations.get(i); 044 if (RelationshipType.call.name().equals(relationElem.getType())) { 045 continue; 046 } 047 if (RelationshipType.er.name().equals(relationElem.getType())) { 048 continue; 049 } 050 targetColumn target = relationElem.getTarget(); 051 String targetParent = target.getParent_id(); 052 if (isTarget(instance, targetParent)) { 053 List<Pair<sourceColumn, List<String>>> relationSources = new ArrayList<Pair<sourceColumn, List<String>>>(); 054 findSourceRelations(dbVendor, target, instance, targetIdRelationMap, relationElem, relationSources, 055 new String[]{relationElem.getType()}); 056 if (relationSources.size() > 0) { 057 Map<sourceColumn, List<Pair<sourceColumn, List<String>>>> columnMap = new HashMap<sourceColumn, List<Pair<sourceColumn, List<String>>>>(); 058 for (Pair<sourceColumn, List<String>> item : relationSources) { 059 if (!columnMap.containsKey(item.first)) { 060 columnMap.put(item.first, new ArrayList<Pair<sourceColumn, List<String>>>()); 061 } 062 columnMap.get(item.first).add(item); 063 } 064 065 Iterator<sourceColumn> iter = columnMap.keySet().iterator(); 066 Map<String, List<sourceColumn>> relationSourceMap = new HashMap<String, List<sourceColumn>>(); 067 while (iter.hasNext()) { 068 sourceColumn column = iter.next(); 069 String relationType = mergeRelationType(columnMap.get(column)); 070 if (!relationSourceMap.containsKey(relationType)) { 071 relationSourceMap.put(relationType, new ArrayList<sourceColumn>()); 072 } 073 relationSourceMap.get(relationType).add(column); 074 } 075 076 Iterator<String> sourceIter = relationSourceMap.keySet().iterator(); 077 while (sourceIter.hasNext()) { 078 String relationType = sourceIter.next(); 079 relationship simpleRelation = (relationship) relationElem.clone(); 080 simpleRelation.setSources(relationSourceMap.get(relationType)); 081 simpleRelation.setType(relationType); 082 simpleRelation.setId(String.valueOf(++maxId)); 083 simpleRelations.add(simpleRelation); 084 } 085 } 086 } 087 } 088 } 089 simple.setErrors(instance.getErrors()); 090 simple.setPackages(instance.getPackages()); 091 simple.setProcedures(instance.getProcedures()); 092 simple.setProcesses(instance.getProcesses()); 093 simple.setTables(instance.getTables()); 094 simple.setViews(instance.getViews()); 095 simple.setDatabases(instance.getDatabases()); 096 simple.setSchemas(instance.getSchemas()); 097 simple.setStages(instance.getStages()); 098 simple.setSequences(instance.getSequences()); 099 simple.setDatasources(instance.getDatasources()); 100 simple.setStreams(instance.getStreams()); 101 simple.setPaths(instance.getPaths()); 102 simple.setVariables(instance.getVariables()); 103 if (instance.getResultsets() != null) { 104 List<table> resultSets = new ArrayList<table>(); 105 for (int i = 0; i < instance.getResultsets().size(); i++) { 106 table resultSet = instance.getResultsets().get(i); 107 if (!isFunction(resultSet)) { 108 resultSets.add(resultSet); 109 } 110 } 111 simple.setResultsets(resultSets); 112 } 113 simple.setRelationships(simpleRelations); 114 simple.setOrientation(instance.getOrientation()); 115 return simple; 116 } catch (Exception e) { 117 logger.error("Remove dataflow function failed.", e); 118 } 119 return instance; 120 } 121 122 private boolean isFunction(table resultSet) { 123 if ("function".equals(resultSet.getType())) { 124 return true; 125 } else if ("resultset".equals(resultSet.getType()) && "function".equals(resultSet.getSubType())) { 126 return true; 127 } 128 return false; 129 } 130 131 private void findSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, Map<String, Set<relationship>> sourceIdRelationMap, 132 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes) { 133 findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, targetRelation, relationSources, pathTypes, 134 new HashSet<String>(), new LinkedHashSet<transform>(), new LinkedHashSet<candidateTable>()); 135 } 136 137 private void findStarSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, targetColumn starRelationTarget, Map<String, Set<relationship>> sourceIdRelationMap, 138 relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes, 139 Set<String> paths, Set<transform> transforms, Set<candidateTable> candidateTables) { 140 if (targetRelation != null && targetRelation.getSources() != null) { 141 for (int i = 0; i < targetRelation.getSources().size(); i++) { 142 sourceColumn source = targetRelation.getSources().get(i); 143 144 if (starRelationTarget != null && !"*".equals(source.getColumn()) 145 && !DlineageUtil.getIdentifierNormalColumnName(starRelationTarget.getColumn(), dbVendor) 146 .equals(DlineageUtil.getIdentifierNormalColumnName(source.getColumn(), dbVendor))) { 147 continue; 148 } 149 150 String sourceColumnId = source.getId(); 151 String sourceParentId = source.getParent_id(); 152 if (sourceParentId == null || sourceColumnId == null) { 153 continue; 154 } 155 if (isTarget(instance, sourceParentId)) { 156 List<transform> transforms2 = new ArrayList<transform>(transforms.size()); 157 transforms2.addAll(transforms); 158 Collections.reverse(transforms2); 159 160 List<candidateTable> candidateTables2 = new ArrayList<candidateTable>(candidateTables.size()); 161 candidateTables2.addAll(candidateTables); 162 163 sourceColumn sourceColumnCopy = DlineageUtil.copySourceColumn(source); 164 for (transform t : transforms2) { 165 sourceColumnCopy.addTransform(t); 166 } 167 for (candidateTable t : candidateTables2) { 168 sourceColumnCopy.addCandidateParent(t); 169 } 170 171 if (Boolean.TRUE.equals(target.isStruct()) && Boolean.TRUE.equals(source.isStruct())) { 172 List<String> targetColumns = SQLUtil.parseNames(target.getColumn()); 173 List<String> sourceColumns = SQLUtil.parseNames(source.getColumn()); 174 if (!DlineageUtil.getIdentifierNormalColumnName(targetColumns.get(targetColumns.size() - 1), dbVendor) 175 .equals(DlineageUtil.getIdentifierNormalColumnName(sourceColumns.get(sourceColumns.size() - 1), dbVendor))) { 176 continue; 177 } 178 } 179 relationSources.add(new Pair<sourceColumn, List<String>>(sourceColumnCopy, Arrays.asList(pathTypes))); 180 } else { 181 Set<relationship> sourceRelations = sourceIdRelationMap 182 .get(source.getParent_id() + "." + source.getId()); 183 if (sourceRelations != null) { 184 if (paths.contains(source.getParent_id() + "." + source.getId())) { 185 continue; 186 } else { 187 paths.add(source.getParent_id() + "." + source.getId()); 188 if (source.getTransforms() != null) { 189 transforms.addAll(source.getTransforms()); 190 } 191 if (source.getCandidateParents() != null) { 192 candidateTables.addAll(source.getCandidateParents()); 193 } 194 } 195 if (sourceRelations != null) { 196 for (relationship relation : sourceRelations) { 197 LinkedHashSet<transform> transforms2 = new LinkedHashSet<transform>(transforms.size()); 198 transforms2.addAll(transforms); 199 LinkedHashSet<candidateTable> candidateTables2 = new LinkedHashSet<candidateTable>(candidateTables.size()); 200 candidateTables2.addAll(candidateTables); 201 String[] types = new String[pathTypes.length + 1]; 202 types[0] = relation.getType(); 203 System.arraycopy(pathTypes, 0, types, 1, pathTypes.length); 204 if (!"*".equals(source.getColumn())) { 205 findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, relation, relationSources, 206 types, paths, transforms2, candidateTables2); 207 } else { 208 findStarSourceRelations(dbVendor, target, instance, 209 starRelationTarget == null ? targetRelation.getTarget() : starRelationTarget, 210 sourceIdRelationMap, relation, relationSources, types, paths, transforms, candidateTables2); 211 } 212 } 213 } 214 } 215 } 216 } 217 } 218 } 219 220 private boolean isTarget(dataflow instance, String targetParentId) { 221 if (targetTables.containsKey(targetParentId)) 222 return targetTables.get(targetParentId); 223 if (isTable(instance, targetParentId)) { 224 targetTables.put(targetParentId, true); 225 return true; 226 } else if (isView(instance, targetParentId)) { 227 targetTables.put(targetParentId, true); 228 return true; 229 } else if (isStage(instance, targetParentId)) { 230 targetTables.put(targetParentId, true); 231 return true; 232 } else if (isSequence(instance, targetParentId)) { 233 targetTables.put(targetParentId, true); 234 return true; 235 } else if (isDataSource(instance, targetParentId)) { 236 targetTables.put(targetParentId, true); 237 return true; 238 } else if (isDatabase(instance, targetParentId)) { 239 targetTables.put(targetParentId, true); 240 return true; 241 } else if (isSchema(instance, targetParentId)) { 242 targetTables.put(targetParentId, true); 243 return true; 244 } else if (isStream(instance, targetParentId)) { 245 targetTables.put(targetParentId, true); 246 return true; 247 } else if (isFile(instance, targetParentId)) { 248 targetTables.put(targetParentId, true); 249 return true; 250 } else if (isVariable(instance, targetParentId)) { 251 targetTables.put(targetParentId, true); 252 return true; 253 } else if (isTargetResultSet(instance, targetParentId)) { 254 targetTables.put(targetParentId, true); 255 return true; 256 } 257 targetTables.put(targetParentId, false); 258 return false; 259 } 260 261 private boolean isTable(dataflow instance, String targetParentId) { 262 if (isTables.containsKey(targetParentId)) { 263 return isTables.get(targetParentId); 264 } 265 if (instance.getTables() != null) { 266 for (int i = 0; i < instance.getTables().size(); i++) { 267 table resultSet = instance.getTables().get(i); 268 isTables.put(resultSet.getId(), true); 269 if (resultSet.getId().equalsIgnoreCase(targetParentId)) { 270 return true; 271 } 272 } 273 } 274 isTables.put(targetParentId, false); 275 return false; 276 } 277 278 private boolean isView(dataflow instance, String targetParent) { 279 if (instance.getViews() != null) { 280 for (int i = 0; i < instance.getViews().size(); i++) { 281 table resultSet = instance.getViews().get(i); 282 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 283 return true; 284 } 285 } 286 } 287 return false; 288 } 289 290 private boolean isStage(dataflow instance, String targetParent) { 291 if (instance.getStages() != null) { 292 for (int i = 0; i < instance.getStages().size(); i++) { 293 table resultSet = instance.getStages().get(i); 294 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 295 return true; 296 } 297 } 298 } 299 return false; 300 } 301 302 private boolean isSequence(dataflow instance, String targetParent) { 303 if (instance.getSequences() != null) { 304 for (int i = 0; i < instance.getSequences().size(); i++) { 305 table resultSet = instance.getSequences().get(i); 306 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 307 return true; 308 } 309 } 310 } 311 return false; 312 } 313 314 private boolean isDataSource(dataflow instance, String targetParent) { 315 if (instance.getDatasources() != null) { 316 for (int i = 0; i < instance.getDatasources().size(); i++) { 317 table resultSet = instance.getDatasources().get(i); 318 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 319 return true; 320 } 321 } 322 } 323 return false; 324 } 325 326 private boolean isDatabase(dataflow instance, String targetParent) { 327 if (instance.getDatabases() != null) { 328 for (int i = 0; i < instance.getDatabases().size(); i++) { 329 table resultSet = instance.getDatabases().get(i); 330 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 331 return true; 332 } 333 } 334 } 335 return false; 336 } 337 338 private boolean isSchema(dataflow instance, String targetParent) { 339 if (instance.getSchemas() != null) { 340 for (int i = 0; i < instance.getSchemas().size(); i++) { 341 table resultSet = instance.getSchemas().get(i); 342 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 343 return true; 344 } 345 } 346 } 347 return false; 348 } 349 350 private boolean isStream(dataflow instance, String targetParent) { 351 if (instance.getStreams() != null) { 352 for (int i = 0; i < instance.getStreams().size(); i++) { 353 table resultSet = instance.getStreams().get(i); 354 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 355 return true; 356 } 357 } 358 } 359 return false; 360 } 361 362 private boolean isFile(dataflow instance, String targetParent) { 363 if (instance.getPaths() != null) { 364 for (int i = 0; i < instance.getPaths().size(); i++) { 365 table resultSet = instance.getPaths().get(i); 366 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 367 return true; 368 } 369 } 370 } 371 return false; 372 } 373 374 private boolean isVariable(dataflow instance, String targetParent) { 375 if (instance.getVariables() != null) { 376 for (int i = 0; i < instance.getVariables().size(); i++) { 377 table resultSet = instance.getVariables().get(i); 378 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 379 return true; 380 } 381 } 382 } 383 return false; 384 } 385 386 private boolean isTargetResultSet(dataflow instance, String targetParent) { 387 if (instance.getResultsets() != null) { 388 for (int i = 0; i < instance.getResultsets().size(); i++) { 389 table resultSet = instance.getResultsets().get(i); 390 if (resultSet.getId().equalsIgnoreCase(targetParent)) { 391 return !isFunction(resultSet); 392 } 393 } 394 } 395 return false; 396 } 397 398 private String mergeRelationType(List<Pair<sourceColumn, List<String>>> typePaths) { 399 RelationshipType relationType = RelationshipType.join; 400 for (int i = 0; i < typePaths.size(); i++) { 401 List<String> path = typePaths.get(i).second; 402 RelationshipType type = RelationshipType.valueOf(getRelationType(path)); 403 if (type.ordinal() < relationType.ordinal()) { 404 relationType = type; 405 } 406 } 407 return relationType.name(); 408 } 409 410 private String getRelationType(List<String> typePaths) { 411 if (typePaths.contains("join")) 412 return "join"; 413 if (typePaths.contains("fdr")) 414 return "fdr"; 415 if (typePaths.contains("frd")) 416 return "frd"; 417 if (typePaths.contains("fddi")) 418 return "fddi"; 419 return "fdd"; 420 } 421 422}