001package gudusoft.gsqlparser.common.structured.adapters; 002 003import gudusoft.gsqlparser.EDbVendor; 004import gudusoft.gsqlparser.EExpressionType; 005import gudusoft.gsqlparser.ELiteralType; 006import gudusoft.gsqlparser.common.structured.StructuredAdapterContext; 007import gudusoft.gsqlparser.common.structured.StructuredArrayType; 008import gudusoft.gsqlparser.common.structured.StructuredColumnPath; 009import gudusoft.gsqlparser.common.structured.StructuredDataflowAdapter; 010import gudusoft.gsqlparser.common.structured.StructuredDataflowDescriptor; 011import gudusoft.gsqlparser.common.structured.StructuredExpansion; 012import gudusoft.gsqlparser.common.structured.StructuredExpansionKind; 013import gudusoft.gsqlparser.common.structured.StructuredFieldBinding; 014import gudusoft.gsqlparser.common.structured.StructuredSchemaDialect; 015import gudusoft.gsqlparser.common.structured.StructuredSchemaParser; 016import gudusoft.gsqlparser.common.structured.StructuredStructField; 017import gudusoft.gsqlparser.common.structured.StructuredStructType; 018import gudusoft.gsqlparser.common.structured.StructuredType; 019import gudusoft.gsqlparser.common.structured.StructuredValueSource; 020import gudusoft.gsqlparser.nodes.TConstant; 021import gudusoft.gsqlparser.nodes.TExpression; 022import gudusoft.gsqlparser.nodes.TExpressionList; 023import gudusoft.gsqlparser.nodes.TFunctionCall; 024import gudusoft.gsqlparser.nodes.TObjectName; 025import gudusoft.gsqlparser.nodes.TParseTreeNode; 026 027/** 028 * Recognizes the Spark SQL pattern 029 * {@code explode(from_json(<column>, '<literal ARRAY<STRUCT<...>> schema>'))} 030 * (also {@code explode_outer}) and produces a vendor-neutral 031 * {@link StructuredDataflowDescriptor}. Unsupported variants 032 * (dynamic schema column, {@code schema_of_json(...)}, MAP element fields, 033 * {@code posexplode}, {@code inline}, etc.) return {@code null} so the 034 * existing generic function lineage runs unchanged. 035 * 036 * <p>The same syntactic pattern is supported by Hive (which shares Spark's 037 * {@code from_json} + {@code explode} UDFs and ARRAY/STRUCT type-string 038 * grammar), so the adapter also matches {@link EDbVendor#dbvhive}. 039 */ 040public final class SparkFromJsonExplodeAdapter implements StructuredDataflowAdapter { 041 042 @Override 043 public StructuredDataflowDescriptor describe(TParseTreeNode node, StructuredAdapterContext context) { 044 if (context == null) return null; 045 EDbVendor v = context.getDbVendor(); 046 if (v != EDbVendor.dbvsparksql && v != EDbVendor.dbvhive) { 047 return null; 048 } 049 if (!(node instanceof TFunctionCall)) { 050 return null; 051 } 052 TFunctionCall outer = (TFunctionCall) node; 053 String outerName = functionNameOf(outer); 054 if (outerName == null) return null; 055 056 boolean isOuterVariant; 057 if (outerName.equalsIgnoreCase("explode")) { 058 isOuterVariant = false; 059 } else if (outerName.equalsIgnoreCase("explode_outer")) { 060 isOuterVariant = true; 061 } else { 062 return null; 063 } 064 065 TExpressionList outerArgs = outer.getArgs(); 066 if (outerArgs == null || outerArgs.size() != 1) { 067 return null; 068 } 069 TExpression arg0 = outerArgs.getExpression(0); 070 if (arg0 == null || arg0.getExpressionType() != EExpressionType.function_t) { 071 return null; 072 } 073 TFunctionCall inner = arg0.getFunctionCall(); 074 if (inner == null) return null; 075 String innerName = functionNameOf(inner); 076 if (innerName == null || !innerName.equalsIgnoreCase("from_json")) { 077 return null; 078 } 079 TExpressionList innerArgs = inner.getArgs(); 080 if (innerArgs == null || innerArgs.size() < 2) { 081 return null; 082 } 083 TExpression sourceArg = innerArgs.getExpression(0); 084 TExpression schemaArg = innerArgs.getExpression(1); 085 if (sourceArg == null || schemaArg == null) { 086 return null; 087 } 088 if (sourceArg.getExpressionType() != EExpressionType.simple_object_name_t) { 089 return null; 090 } 091 TObjectName sourceColumn = sourceArg.getObjectOperand(); 092 if (sourceColumn == null) return null; 093 094 if (schemaArg.getExpressionType() != EExpressionType.simple_constant_t) { 095 return null; 096 } 097 TConstant schemaConst = schemaArg.getConstantOperand(); 098 if (schemaConst == null || schemaConst.getLiteralType() != ELiteralType.etString) { 099 return null; 100 } 101 String schemaText = schemaConst.getValue(); 102 if (schemaText == null || schemaText.isEmpty()) { 103 return null; 104 } 105 106 StructuredType parsed; 107 try { 108 parsed = StructuredSchemaParser.parse(schemaText, StructuredSchemaDialect.SPARK_SQL_TYPE_STRING); 109 } catch (RuntimeException ex) { 110 return null; 111 } 112 if (!(parsed instanceof StructuredArrayType)) { 113 return null; 114 } 115 StructuredArrayType arrayType = (StructuredArrayType) parsed; 116 if (!(arrayType.getElementType() instanceof StructuredStructType)) { 117 return null; 118 } 119 StructuredStructType structType = (StructuredStructType) arrayType.getElementType(); 120 121 String rootColumnName = sourceColumn.getColumnNameOnly(); 122 if (rootColumnName == null || rootColumnName.isEmpty()) { 123 rootColumnName = sourceColumn.toString(); 124 } 125 String sourceAlias = sourceColumn.getTableString(); 126 if (sourceAlias != null && sourceAlias.isEmpty()) { 127 sourceAlias = null; 128 } 129 130 StructuredValueSource source = new StructuredValueSource(sourceColumn, sourceAlias, rootColumnName); 131 StructuredColumnPath elementPath = StructuredColumnPath.of(rootColumnName).arrayElement().build(); 132 StructuredExpansion expansion = new StructuredExpansion( 133 StructuredExpansionKind.ARRAY_ELEMENT, elementPath, isOuterVariant); 134 135 StructuredDataflowDescriptor.Builder b = StructuredDataflowDescriptor.builder() 136 .dialect(v) 137 .syntaxNode(outer) 138 .source(source) 139 .sourceType(arrayType) 140 .expansion(expansion); 141 142 for (StructuredStructField f : structType.getFields()) { 143 StructuredColumnPath fieldPath = StructuredColumnPath.of(rootColumnName) 144 .arrayElement() 145 .field(f.getName()) 146 .build(); 147 b.addFieldBinding(new StructuredFieldBinding(f.getName(), fieldPath, f.getType())); 148 } 149 return b.build(); 150 } 151 152 private static String functionNameOf(TFunctionCall call) { 153 if (call == null || call.getFunctionName() == null) return null; 154 return call.getFunctionName().toString(); 155 } 156}