001package gudusoft.gsqlparser.common.structured.adapters;
002
003import gudusoft.gsqlparser.EDbVendor;
004import gudusoft.gsqlparser.EExpressionType;
005import gudusoft.gsqlparser.ELiteralType;
006import gudusoft.gsqlparser.common.structured.StructuredAdapterContext;
007import gudusoft.gsqlparser.common.structured.StructuredArrayType;
008import gudusoft.gsqlparser.common.structured.StructuredColumnPath;
009import gudusoft.gsqlparser.common.structured.StructuredDataflowAdapter;
010import gudusoft.gsqlparser.common.structured.StructuredDataflowDescriptor;
011import gudusoft.gsqlparser.common.structured.StructuredExpansion;
012import gudusoft.gsqlparser.common.structured.StructuredExpansionKind;
013import gudusoft.gsqlparser.common.structured.StructuredFieldBinding;
014import gudusoft.gsqlparser.common.structured.StructuredSchemaDialect;
015import gudusoft.gsqlparser.common.structured.StructuredSchemaParser;
016import gudusoft.gsqlparser.common.structured.StructuredStructField;
017import gudusoft.gsqlparser.common.structured.StructuredStructType;
018import gudusoft.gsqlparser.common.structured.StructuredType;
019import gudusoft.gsqlparser.common.structured.StructuredValueSource;
020import gudusoft.gsqlparser.nodes.TConstant;
021import gudusoft.gsqlparser.nodes.TExpression;
022import gudusoft.gsqlparser.nodes.TExpressionList;
023import gudusoft.gsqlparser.nodes.TFunctionCall;
024import gudusoft.gsqlparser.nodes.TObjectName;
025import gudusoft.gsqlparser.nodes.TParseTreeNode;
026
027/**
028 * Recognizes the Spark SQL pattern
029 * {@code explode(from_json(<column>, '<literal ARRAY<STRUCT<...>> schema>'))}
030 * (also {@code explode_outer}) and produces a vendor-neutral
031 * {@link StructuredDataflowDescriptor}. Unsupported variants
032 * (dynamic schema column, {@code schema_of_json(...)}, MAP element fields,
033 * {@code posexplode}, {@code inline}, etc.) return {@code null} so the
034 * existing generic function lineage runs unchanged.
035 *
036 * <p>The same syntactic pattern is supported by Hive (which shares Spark's
037 * {@code from_json} + {@code explode} UDFs and ARRAY/STRUCT type-string
038 * grammar), so the adapter also matches {@link EDbVendor#dbvhive}.
039 */
040public final class SparkFromJsonExplodeAdapter implements StructuredDataflowAdapter {
041
042    @Override
043    public StructuredDataflowDescriptor describe(TParseTreeNode node, StructuredAdapterContext context) {
044        if (context == null) return null;
045        EDbVendor v = context.getDbVendor();
046        if (v != EDbVendor.dbvsparksql && v != EDbVendor.dbvhive) {
047            return null;
048        }
049        if (!(node instanceof TFunctionCall)) {
050            return null;
051        }
052        TFunctionCall outer = (TFunctionCall) node;
053        String outerName = functionNameOf(outer);
054        if (outerName == null) return null;
055
056        boolean isOuterVariant;
057        if (outerName.equalsIgnoreCase("explode")) {
058            isOuterVariant = false;
059        } else if (outerName.equalsIgnoreCase("explode_outer")) {
060            isOuterVariant = true;
061        } else {
062            return null;
063        }
064
065        TExpressionList outerArgs = outer.getArgs();
066        if (outerArgs == null || outerArgs.size() != 1) {
067            return null;
068        }
069        TExpression arg0 = outerArgs.getExpression(0);
070        if (arg0 == null || arg0.getExpressionType() != EExpressionType.function_t) {
071            return null;
072        }
073        TFunctionCall inner = arg0.getFunctionCall();
074        if (inner == null) return null;
075        String innerName = functionNameOf(inner);
076        if (innerName == null || !innerName.equalsIgnoreCase("from_json")) {
077            return null;
078        }
079        TExpressionList innerArgs = inner.getArgs();
080        if (innerArgs == null || innerArgs.size() < 2) {
081            return null;
082        }
083        TExpression sourceArg = innerArgs.getExpression(0);
084        TExpression schemaArg = innerArgs.getExpression(1);
085        if (sourceArg == null || schemaArg == null) {
086            return null;
087        }
088        if (sourceArg.getExpressionType() != EExpressionType.simple_object_name_t) {
089            return null;
090        }
091        TObjectName sourceColumn = sourceArg.getObjectOperand();
092        if (sourceColumn == null) return null;
093
094        if (schemaArg.getExpressionType() != EExpressionType.simple_constant_t) {
095            return null;
096        }
097        TConstant schemaConst = schemaArg.getConstantOperand();
098        if (schemaConst == null || schemaConst.getLiteralType() != ELiteralType.etString) {
099            return null;
100        }
101        String schemaText = schemaConst.getValue();
102        if (schemaText == null || schemaText.isEmpty()) {
103            return null;
104        }
105
106        StructuredType parsed;
107        try {
108            parsed = StructuredSchemaParser.parse(schemaText, StructuredSchemaDialect.SPARK_SQL_TYPE_STRING);
109        } catch (RuntimeException ex) {
110            return null;
111        }
112        if (!(parsed instanceof StructuredArrayType)) {
113            return null;
114        }
115        StructuredArrayType arrayType = (StructuredArrayType) parsed;
116        if (!(arrayType.getElementType() instanceof StructuredStructType)) {
117            return null;
118        }
119        StructuredStructType structType = (StructuredStructType) arrayType.getElementType();
120
121        String rootColumnName = sourceColumn.getColumnNameOnly();
122        if (rootColumnName == null || rootColumnName.isEmpty()) {
123            rootColumnName = sourceColumn.toString();
124        }
125        String sourceAlias = sourceColumn.getTableString();
126        if (sourceAlias != null && sourceAlias.isEmpty()) {
127            sourceAlias = null;
128        }
129
130        StructuredValueSource source = new StructuredValueSource(sourceColumn, sourceAlias, rootColumnName);
131        StructuredColumnPath elementPath = StructuredColumnPath.of(rootColumnName).arrayElement().build();
132        StructuredExpansion expansion = new StructuredExpansion(
133                StructuredExpansionKind.ARRAY_ELEMENT, elementPath, isOuterVariant);
134
135        StructuredDataflowDescriptor.Builder b = StructuredDataflowDescriptor.builder()
136                .dialect(v)
137                .syntaxNode(outer)
138                .source(source)
139                .sourceType(arrayType)
140                .expansion(expansion);
141
142        for (StructuredStructField f : structType.getFields()) {
143            StructuredColumnPath fieldPath = StructuredColumnPath.of(rootColumnName)
144                    .arrayElement()
145                    .field(f.getName())
146                    .build();
147            b.addFieldBinding(new StructuredFieldBinding(f.getName(), fieldPath, f.getType()));
148        }
149        return b.build();
150    }
151
152    private static String functionNameOf(TFunctionCall call) {
153        if (call == null || call.getFunctionName() == null) return null;
154        return call.getFunctionName().toString();
155    }
156}