001package gudusoft.gsqlparser.dlineage.util;
002
003import java.util.*;
004
005import gudusoft.gsqlparser.EDbVendor;
006import gudusoft.gsqlparser.dlineage.dataflow.model.RelationshipType;
007import gudusoft.gsqlparser.dlineage.dataflow.model.xml.*;
008import gudusoft.gsqlparser.util.Logger;
009import gudusoft.gsqlparser.util.LoggerFactory;
010import gudusoft.gsqlparser.util.SQLUtil;
011
012public class RemoveDataflowFunction {
013
014    private static final Logger logger = LoggerFactory.getLogger(DataflowRemoveHelper.class);
015    private Map<String, Boolean> isTables = new HashMap<String, Boolean>();
016    private Map<String, Boolean> targetTables = new HashMap<String, Boolean>();
017
018    public dataflow removeFunction(dataflow instance, EDbVendor dbVendor) {
019        if (instance.getResultsets() == null || instance.getResultsets().isEmpty()) {
020            return instance;
021        }
022        try {
023            targetTables.clear();
024            isTables.clear();
025            dataflow simple = new dataflow();
026            List<relationship> simpleRelations = new ArrayList<relationship>();
027            List<relationship> relations = instance.getRelationships();
028            if (relations != null && relations.size() > 0) {
029                Map<String, Set<relationship>> targetIdRelationMap = new HashMap<String, Set<relationship>>();
030                for (relationship relation : relations) {
031                    if (relation.getTarget() != null) {
032                        String key = relation.getTarget().getParent_id() + "." + relation.getTarget().getId();
033                        if (!targetIdRelationMap.containsKey(key)) {
034                            targetIdRelationMap.put(key, new HashSet<relationship>());
035                        }
036                        targetIdRelationMap.get(key).add(relation);
037                    }
038                }
039
040                long maxId = Long
041                        .parseLong(relations.get(relations.size() - 1).getId().split("\\-")[0].replace("_", "")) * 100;
042                for (int i = 0; i < relations.size(); i++) {
043                    relationship relationElem = relations.get(i);
044                    if (RelationshipType.call.name().equals(relationElem.getType())) {
045                        continue;
046                    }
047                    if (RelationshipType.er.name().equals(relationElem.getType())) {
048                        continue;
049                    }
050                    targetColumn target = relationElem.getTarget();
051                    String targetParent = target.getParent_id();
052                    if (isTarget(instance, targetParent)) {
053                        List<Pair<sourceColumn, List<String>>> relationSources = new ArrayList<Pair<sourceColumn, List<String>>>();
054                        findSourceRelations(dbVendor, target, instance, targetIdRelationMap, relationElem, relationSources,
055                                new String[]{relationElem.getType()});
056                        if (relationSources.size() > 0) {
057                            Map<sourceColumn, List<Pair<sourceColumn, List<String>>>> columnMap = new HashMap<sourceColumn, List<Pair<sourceColumn, List<String>>>>();
058                            for (Pair<sourceColumn, List<String>> item : relationSources) {
059                                if (!columnMap.containsKey(item.first)) {
060                                    columnMap.put(item.first, new ArrayList<Pair<sourceColumn, List<String>>>());
061                                }
062                                columnMap.get(item.first).add(item);
063                            }
064
065                            Iterator<sourceColumn> iter = columnMap.keySet().iterator();
066                            Map<String, List<sourceColumn>> relationSourceMap = new HashMap<String, List<sourceColumn>>();
067                            while (iter.hasNext()) {
068                                sourceColumn column = iter.next();
069                                String relationType = mergeRelationType(columnMap.get(column));
070                                if (!relationSourceMap.containsKey(relationType)) {
071                                    relationSourceMap.put(relationType, new ArrayList<sourceColumn>());
072                                }
073                                relationSourceMap.get(relationType).add(column);
074                            }
075
076                            Iterator<String> sourceIter = relationSourceMap.keySet().iterator();
077                            while (sourceIter.hasNext()) {
078                                String relationType = sourceIter.next();
079                                relationship simpleRelation = (relationship) relationElem.clone();
080                                simpleRelation.setSources(relationSourceMap.get(relationType));
081                                simpleRelation.setType(relationType);
082                                simpleRelation.setId(String.valueOf(++maxId));
083                                simpleRelations.add(simpleRelation);
084                            }
085                        }
086                    }
087                }
088            }
089            simple.setErrors(instance.getErrors());
090            simple.setPackages(instance.getPackages());
091            simple.setProcedures(instance.getProcedures());
092            simple.setProcesses(instance.getProcesses());
093            simple.setTables(instance.getTables());
094            simple.setViews(instance.getViews());
095            simple.setDatabases(instance.getDatabases());
096            simple.setSchemas(instance.getSchemas());
097            simple.setStages(instance.getStages());
098            simple.setSequences(instance.getSequences());
099            simple.setDatasources(instance.getDatasources());
100            simple.setStreams(instance.getStreams());
101            simple.setPaths(instance.getPaths());
102            simple.setVariables(instance.getVariables());
103            if (instance.getResultsets() != null) {
104                List<table> resultSets = new ArrayList<table>();
105                for (int i = 0; i < instance.getResultsets().size(); i++) {
106                    table resultSet = instance.getResultsets().get(i);
107                    if (!isFunction(resultSet)) {
108                        resultSets.add(resultSet);
109                    }
110                }
111                simple.setResultsets(resultSets);
112            }
113            simple.setRelationships(simpleRelations);
114            simple.setOrientation(instance.getOrientation());
115            return simple;
116        } catch (Exception e) {
117            logger.error("Remove dataflow function failed.", e);
118        }
119        return instance;
120    }
121
122    private boolean isFunction(table resultSet) {
123        if ("function".equals(resultSet.getType())) {
124            return true;
125        } else if ("resultset".equals(resultSet.getType()) && "function".equals(resultSet.getSubType())) {
126            return true;
127        }
128        return false;
129    }
130
131        private void findSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, Map<String, Set<relationship>> sourceIdRelationMap,
132                                                                         relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes) {
133                findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, targetRelation, relationSources, pathTypes,
134                                new HashSet<String>(), new LinkedHashSet<transform>(), new LinkedHashSet<candidateTable>());
135        }
136
137    private void findStarSourceRelations(EDbVendor dbVendor, targetColumn target, dataflow instance, targetColumn starRelationTarget, Map<String, Set<relationship>> sourceIdRelationMap,
138                                         relationship targetRelation, List<Pair<sourceColumn, List<String>>> relationSources, String[] pathTypes,
139                                                                                 Set<String> paths, Set<transform> transforms, Set<candidateTable> candidateTables) {
140        if (targetRelation != null && targetRelation.getSources() != null) {
141            for (int i = 0; i < targetRelation.getSources().size(); i++) {
142                sourceColumn source = targetRelation.getSources().get(i);
143
144                if (starRelationTarget != null && !"*".equals(source.getColumn())
145                        && !DlineageUtil.getIdentifierNormalColumnName(starRelationTarget.getColumn(), dbVendor)
146                        .equals(DlineageUtil.getIdentifierNormalColumnName(source.getColumn(), dbVendor))) {
147                    continue;
148                }
149
150                String sourceColumnId = source.getId();
151                String sourceParentId = source.getParent_id();
152                if (sourceParentId == null || sourceColumnId == null) {
153                    continue;
154                }
155                if (isTarget(instance, sourceParentId)) {
156                    List<transform> transforms2 = new ArrayList<transform>(transforms.size());
157                    transforms2.addAll(transforms);
158                    Collections.reverse(transforms2);
159                    
160                    List<candidateTable> candidateTables2 = new ArrayList<candidateTable>(candidateTables.size());
161                    candidateTables2.addAll(candidateTables);
162                    
163                    sourceColumn sourceColumnCopy = DlineageUtil.copySourceColumn(source);
164                    for (transform t : transforms2) {
165                        sourceColumnCopy.addTransform(t);
166                    }
167                    for (candidateTable t : candidateTables2) {
168                        sourceColumnCopy.addCandidateParent(t);
169                    }
170                    
171                    if (Boolean.TRUE.equals(target.isStruct()) && Boolean.TRUE.equals(source.isStruct())) {
172                        List<String> targetColumns = SQLUtil.parseNames(target.getColumn());
173                        List<String> sourceColumns = SQLUtil.parseNames(source.getColumn());
174                        if (!DlineageUtil.getIdentifierNormalColumnName(targetColumns.get(targetColumns.size() - 1), dbVendor)
175                                .equals(DlineageUtil.getIdentifierNormalColumnName(sourceColumns.get(sourceColumns.size() - 1), dbVendor))) {
176                            continue;
177                        }
178                    }
179                    relationSources.add(new Pair<sourceColumn, List<String>>(sourceColumnCopy, Arrays.asList(pathTypes)));
180                } else {
181                    Set<relationship> sourceRelations = sourceIdRelationMap
182                            .get(source.getParent_id() + "." + source.getId());
183                    if (sourceRelations != null) {
184                        if (paths.contains(source.getParent_id() + "." + source.getId())) {
185                            continue;
186                        } else {
187                            paths.add(source.getParent_id() + "." + source.getId());
188                            if (source.getTransforms() != null) {
189                                transforms.addAll(source.getTransforms());
190                            }
191                            if (source.getCandidateParents() != null) {
192                                candidateTables.addAll(source.getCandidateParents());
193                            }
194                        }
195                        if (sourceRelations != null) {
196                            for (relationship relation : sourceRelations) {
197                                LinkedHashSet<transform> transforms2 = new LinkedHashSet<transform>(transforms.size());
198                                transforms2.addAll(transforms);
199                                LinkedHashSet<candidateTable> candidateTables2 = new LinkedHashSet<candidateTable>(candidateTables.size());
200                                candidateTables2.addAll(candidateTables);
201                                String[] types = new String[pathTypes.length + 1];
202                                types[0] = relation.getType();
203                                System.arraycopy(pathTypes, 0, types, 1, pathTypes.length);
204                                if (!"*".equals(source.getColumn())) {
205                                    findStarSourceRelations(dbVendor, target, instance, null, sourceIdRelationMap, relation, relationSources,
206                                            types, paths, transforms2, candidateTables2);
207                                } else {
208                                    findStarSourceRelations(dbVendor, target, instance,
209                                            starRelationTarget == null ? targetRelation.getTarget() : starRelationTarget,
210                                            sourceIdRelationMap, relation, relationSources, types, paths, transforms, candidateTables2);
211                                }
212                            }
213                        }
214                    }
215                }
216            }
217        }
218    }
219
220    private boolean isTarget(dataflow instance, String targetParentId) {
221        if (targetTables.containsKey(targetParentId))
222            return targetTables.get(targetParentId);
223        if (isTable(instance, targetParentId)) {
224            targetTables.put(targetParentId, true);
225            return true;
226        } else if (isView(instance, targetParentId)) {
227            targetTables.put(targetParentId, true);
228            return true;
229        } else if (isStage(instance, targetParentId)) {
230            targetTables.put(targetParentId, true);
231            return true;
232        } else if (isSequence(instance, targetParentId)) {
233            targetTables.put(targetParentId, true);
234            return true;
235        } else if (isDataSource(instance, targetParentId)) {
236            targetTables.put(targetParentId, true);
237            return true;
238        } else if (isDatabase(instance, targetParentId)) {
239            targetTables.put(targetParentId, true);
240            return true;
241        } else if (isSchema(instance, targetParentId)) {
242            targetTables.put(targetParentId, true);
243            return true;
244        } else if (isStream(instance, targetParentId)) {
245            targetTables.put(targetParentId, true);
246            return true;
247        } else if (isFile(instance, targetParentId)) {
248            targetTables.put(targetParentId, true);
249            return true;
250        } else if (isVariable(instance, targetParentId)) {
251            targetTables.put(targetParentId, true);
252            return true;
253        } else if (isTargetResultSet(instance, targetParentId)) {
254            targetTables.put(targetParentId, true);
255            return true;
256        }
257        targetTables.put(targetParentId, false);
258        return false;
259    }
260
261    private boolean isTable(dataflow instance, String targetParentId) {
262        if (isTables.containsKey(targetParentId)) {
263            return isTables.get(targetParentId);
264        }
265        if (instance.getTables() != null) {
266            for (int i = 0; i < instance.getTables().size(); i++) {
267                table resultSet = instance.getTables().get(i);
268                isTables.put(resultSet.getId(), true);
269                if (resultSet.getId().equalsIgnoreCase(targetParentId)) {
270                    return true;
271                }
272            }
273        }
274        isTables.put(targetParentId, false);
275        return false;
276    }
277
278    private boolean isView(dataflow instance, String targetParent) {
279        if (instance.getViews() != null) {
280            for (int i = 0; i < instance.getViews().size(); i++) {
281                table resultSet = instance.getViews().get(i);
282                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
283                    return true;
284                }
285            }
286        }
287        return false;
288    }
289
290    private boolean isStage(dataflow instance, String targetParent) {
291        if (instance.getStages() != null) {
292            for (int i = 0; i < instance.getStages().size(); i++) {
293                table resultSet = instance.getStages().get(i);
294                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
295                    return true;
296                }
297            }
298        }
299        return false;
300    }
301
302    private boolean isSequence(dataflow instance, String targetParent) {
303        if (instance.getSequences() != null) {
304            for (int i = 0; i < instance.getSequences().size(); i++) {
305                table resultSet = instance.getSequences().get(i);
306                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
307                    return true;
308                }
309            }
310        }
311        return false;
312    }
313
314    private boolean isDataSource(dataflow instance, String targetParent) {
315        if (instance.getDatasources() != null) {
316            for (int i = 0; i < instance.getDatasources().size(); i++) {
317                table resultSet = instance.getDatasources().get(i);
318                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
319                    return true;
320                }
321            }
322        }
323        return false;
324    }
325
326    private boolean isDatabase(dataflow instance, String targetParent) {
327        if (instance.getDatabases() != null) {
328            for (int i = 0; i < instance.getDatabases().size(); i++) {
329                table resultSet = instance.getDatabases().get(i);
330                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
331                    return true;
332                }
333            }
334        }
335        return false;
336    }
337
338    private boolean isSchema(dataflow instance, String targetParent) {
339        if (instance.getSchemas() != null) {
340            for (int i = 0; i < instance.getSchemas().size(); i++) {
341                table resultSet = instance.getSchemas().get(i);
342                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
343                    return true;
344                }
345            }
346        }
347        return false;
348    }
349
350    private boolean isStream(dataflow instance, String targetParent) {
351        if (instance.getStreams() != null) {
352            for (int i = 0; i < instance.getStreams().size(); i++) {
353                table resultSet = instance.getStreams().get(i);
354                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
355                    return true;
356                }
357            }
358        }
359        return false;
360    }
361
362    private boolean isFile(dataflow instance, String targetParent) {
363        if (instance.getPaths() != null) {
364            for (int i = 0; i < instance.getPaths().size(); i++) {
365                table resultSet = instance.getPaths().get(i);
366                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
367                    return true;
368                }
369            }
370        }
371        return false;
372    }
373
374    private boolean isVariable(dataflow instance, String targetParent) {
375        if (instance.getVariables() != null) {
376            for (int i = 0; i < instance.getVariables().size(); i++) {
377                table resultSet = instance.getVariables().get(i);
378                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
379                    return true;
380                }
381            }
382        }
383        return false;
384    }
385
386    private boolean isTargetResultSet(dataflow instance, String targetParent) {
387        if (instance.getResultsets() != null) {
388            for (int i = 0; i < instance.getResultsets().size(); i++) {
389                table resultSet = instance.getResultsets().get(i);
390                if (resultSet.getId().equalsIgnoreCase(targetParent)) {
391                    return !isFunction(resultSet);
392                }
393            }
394        }
395        return false;
396    }
397
398    private String mergeRelationType(List<Pair<sourceColumn, List<String>>> typePaths) {
399        RelationshipType relationType = RelationshipType.join;
400        for (int i = 0; i < typePaths.size(); i++) {
401            List<String> path = typePaths.get(i).second;
402            RelationshipType type = RelationshipType.valueOf(getRelationType(path));
403            if (type.ordinal() < relationType.ordinal()) {
404                relationType = type;
405            }
406        }
407        return relationType.name();
408    }
409
410    private String getRelationType(List<String> typePaths) {
411        if (typePaths.contains("join"))
412            return "join";
413        if (typePaths.contains("fdr"))
414            return "fdr";
415        if (typePaths.contains("frd"))
416            return "frd";
417        if (typePaths.contains("fddi"))
418            return "fddi";
419        return "fdd";
420    }
421
422}