Skip to content

Commit 9a9f271

Browse files
yingjianwu98Yingjian Wu
andauthored
revert jdbc change and fieldName fidelity (#590)
Co-authored-by: Yingjian Wu <[email protected]>
1 parent ea1c8c1 commit 9a9f271

File tree

5 files changed

+90
-334
lines changed

5 files changed

+90
-334
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ configure(javaProjects) {
146146
dependency("org.codehaus.gpars:gpars:1.2.1")
147147
/**es 5.4.1 dependencies*/
148148
dependency("org.elasticsearch.client:transport:5.4.1")
149-
dependency("net.snowflake:snowflake-jdbc:3.15.0")
149+
dependency("net.snowflake:snowflake-jdbc:3.4.2")
150150
dependency("com.esotericsoftware.kryo:kryo:2.22")
151151
dependency("org.apache.iceberg:iceberg-spark-runtime:${iceberg_version}")
152152
dependency("com.datastax.cassandra:cassandra-driver-core:3.7.2")

metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java

Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,20 @@
3131
import com.netflix.metacat.common.type.VarcharType;
3232
import lombok.extern.slf4j.Slf4j;
3333
import org.apache.hadoop.hive.serde.serdeConstants;
34+
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
35+
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
36+
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
37+
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
3438
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
35-
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
36-
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
37-
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
38-
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
39-
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
40-
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
39+
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
40+
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
41+
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
4142
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
42-
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
43+
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
44+
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
45+
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
4346
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
47+
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
4448
import org.apache.iceberg.PartitionField;
4549
import org.apache.iceberg.Schema;
4650
import org.apache.iceberg.types.Types;
@@ -74,20 +78,24 @@ public class HiveTypeConverter implements ConnectorTypeConverter {
7478
private static final Pattern DECIMAL_TYPE
7579
= Pattern.compile(DECIMAL_WITH_SCALE + "|" + DECIMAL_WITH_SCALE_AND_PRECISION, Pattern.CASE_INSENSITIVE);
7680

77-
private static Type getPrimitiveType(final TypeInfo typeInfo) {
78-
final PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
81+
private static Type getPrimitiveType(final ObjectInspector fieldInspector) {
82+
final PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) fieldInspector)
83+
.getPrimitiveCategory();
7984
if (HiveTypeMapping.getHIVE_TO_CANONICAL().containsKey(primitiveCategory.name())) {
8085
return HiveTypeMapping.getHIVE_TO_CANONICAL().get(primitiveCategory.name());
8186
}
8287
switch (primitiveCategory) {
8388
case DECIMAL:
84-
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
89+
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) fieldInspector)
90+
.getTypeInfo();
8591
return DecimalType.createDecimalType(decimalTypeInfo.precision(), decimalTypeInfo.getScale());
8692
case CHAR:
87-
final int cLength = ((CharTypeInfo) typeInfo).getLength();
93+
final int cLength = ((CharTypeInfo) ((PrimitiveObjectInspector)
94+
fieldInspector).getTypeInfo()).getLength();
8895
return CharType.createCharType(cLength);
8996
case VARCHAR:
90-
final int vLength = ((VarcharTypeInfo) typeInfo).getLength();
97+
final int vLength = ((VarcharTypeInfo) ((PrimitiveObjectInspector) fieldInspector)
98+
.getTypeInfo()).getLength();
9199
return VarcharType.createVarcharType(vLength);
92100
default:
93101
return null;
@@ -98,7 +106,17 @@ private static Type getPrimitiveType(final TypeInfo typeInfo) {
98106
public Type toMetacatType(final String type) {
99107
// Hack to fix presto "varchar" type coming in with no length which is required by Hive.
100108
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(sanitizeType(type));
101-
return getCanonicalType(typeInfo);
109+
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
110+
// The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
111+
// the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
112+
// their original case
113+
if (typeInfo.getCategory().equals(ObjectInspector.Category.STRUCT)) {
114+
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
115+
final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector) oi;
116+
oi = new HiveTypeConverter.SameCaseStandardStructObjectInspector(
117+
structTypeInfo.getAllStructFieldNames(), objectInspector);
118+
}
119+
return getCanonicalType(oi);
102120
}
103121

104122
/**
@@ -287,48 +305,43 @@ public static String sanitizeType(final String type) {
287305
/**
288306
* Returns the canonical type.
289307
*
290-
* @param typeInfo typeInfo
291-
* @return Metacat Type
308+
* @param fieldInspector inspector
309+
* @return type
292310
*/
293-
Type getCanonicalType(final TypeInfo typeInfo) {
294-
switch (typeInfo.getCategory()) {
311+
Type getCanonicalType(final ObjectInspector fieldInspector) {
312+
switch (fieldInspector.getCategory()) {
295313
case PRIMITIVE:
296-
return getPrimitiveType(typeInfo);
314+
return getPrimitiveType(fieldInspector);
297315
case MAP:
298-
final MapTypeInfo mapTypeInfo =
299-
TypeUtils.checkType(typeInfo, MapTypeInfo.class, "typeInfo");
300-
final Type keyType = getCanonicalType(mapTypeInfo.getMapKeyTypeInfo());
301-
final Type valueType = getCanonicalType(mapTypeInfo.getMapValueTypeInfo());
316+
final MapObjectInspector mapObjectInspector =
317+
TypeUtils.checkType(fieldInspector, MapObjectInspector.class,
318+
"fieldInspector");
319+
final Type keyType = getCanonicalType(mapObjectInspector.getMapKeyObjectInspector());
320+
final Type valueType = getCanonicalType(mapObjectInspector.getMapValueObjectInspector());
302321
if (keyType == null || valueType == null) {
303322
return null;
304323
}
305324
return TypeRegistry.getTypeRegistry().getParameterizedType(TypeEnum.MAP,
306325
ImmutableList.of(keyType.getTypeSignature(), valueType.getTypeSignature()), ImmutableList.of());
307326
case LIST:
308-
final ListTypeInfo listTypeInfo =
309-
TypeUtils.checkType(typeInfo, ListTypeInfo.class, "typeInfo");
327+
final ListObjectInspector listObjectInspector =
328+
TypeUtils.checkType(fieldInspector, ListObjectInspector.class,
329+
"fieldInspector");
310330
final Type elementType =
311-
getCanonicalType(listTypeInfo.getListElementTypeInfo());
331+
getCanonicalType(listObjectInspector.getListElementObjectInspector());
312332
if (elementType == null) {
313333
return null;
314334
}
315335
return TypeRegistry.getTypeRegistry().getParameterizedType(TypeEnum.ARRAY,
316336
ImmutableList.of(elementType.getTypeSignature()), ImmutableList.of());
317337
case STRUCT:
318-
final StructTypeInfo structTypeInfo =
319-
TypeUtils.checkType(typeInfo, StructTypeInfo.class, "typeInfo");
320-
// Hive struct type infos
321-
final List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
322-
final List<TypeInfo> structFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
323-
final int structInfoCounts = structFieldNames.size();
324-
325-
// Metacat canonical type infos
326-
final List<TypeSignature> fieldTypes = new ArrayList<>(structInfoCounts);
327-
final List<Object> fieldNames = new ArrayList<>(structInfoCounts);
328-
329-
for (int i = 0; i < structInfoCounts; i++) {
330-
fieldNames.add(structFieldNames.get(i));
331-
final Type fieldType = getCanonicalType(structFieldTypeInfos.get(i));
338+
final StructObjectInspector structObjectInspector =
339+
TypeUtils.checkType(fieldInspector, StructObjectInspector.class, "fieldInspector");
340+
final List<TypeSignature> fieldTypes = new ArrayList<>();
341+
final List<Object> fieldNames = new ArrayList<>();
342+
for (StructField field : structObjectInspector.getAllStructFieldRefs()) {
343+
fieldNames.add(field.getFieldName());
344+
final Type fieldType = getCanonicalType(field.getFieldObjectInspector());
332345
if (fieldType == null) {
333346
return null;
334347
}
@@ -337,8 +350,42 @@ Type getCanonicalType(final TypeInfo typeInfo) {
337350
return TypeRegistry.getTypeRegistry()
338351
.getParameterizedType(TypeEnum.ROW, fieldTypes, fieldNames);
339352
default:
340-
log.info("Currently unsupported type {}, returning Unknown type", typeInfo.getTypeName());
353+
log.info("Currently unsupported type {}, returning Unknown type", fieldInspector.getTypeName());
341354
return BaseType.UNKNOWN;
342355
}
343356
}
357+
358+
// This is protected and extends StandardStructObjectInspector so it can reference MyField
359+
protected static class SameCaseStandardStructObjectInspector extends StandardStructObjectInspector {
360+
private final List<String> realFieldNames;
361+
private final StandardStructObjectInspector structObjectInspector;
362+
363+
public SameCaseStandardStructObjectInspector(final List<String> realFieldNames,
364+
final StandardStructObjectInspector structObjectInspector) {
365+
this.realFieldNames = realFieldNames;
366+
this.structObjectInspector = structObjectInspector;
367+
}
368+
369+
@Override
370+
public List<? extends StructField> getAllStructFieldRefs() {
371+
return structObjectInspector.getAllStructFieldRefs()
372+
.stream()
373+
.map(structField -> (MyField) structField)
374+
.map(field -> new HiveTypeConverter.
375+
SameCaseStandardStructObjectInspector.SameCaseMyField(field.getFieldID(),
376+
realFieldNames.get(field.getFieldID()),
377+
field.getFieldObjectInspector(), field.getFieldComment()))
378+
.collect(Collectors.toList());
379+
}
380+
381+
protected static class SameCaseMyField extends MyField {
382+
public SameCaseMyField(final int fieldID, final String fieldName,
383+
final ObjectInspector fieldObjectInspector,
384+
final String fieldComment) {
385+
super(fieldID, fieldName, fieldObjectInspector, fieldComment);
386+
// Since super lower cases fieldName, this is to restore the original case
387+
this.fieldName = fieldName;
388+
}
389+
}
390+
}
344391
}

metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,6 @@ class HiveTypeConverterSpec extends Specification {
119119
"struct<prediction_date:int,lower_confidence_amt:double,upper_confidence_amt:double,model_short_name:string>",
120120
"struct<prediction_date:int,lower_confidence_amt:int,upper_confidence_amt:int,model_short_name:string>",
121121
"struct<prediction_date:int,prediction_source:string>",
122-
123-
// Nested Type with UpperCase
124-
'array<struct<date:string,countryCodes:array<string>,source:string>>',
125-
"struct<Field3:struct<Nested_Field1:bigint,Nested_Field2:bigint>>",
126-
"struct<Field1:bigint,Field2:bigint,field3:struct<NESTED_Field1:bigint,NesteD_Field2:bigint>>"
127122
]
128123
}
129124

@@ -223,10 +218,6 @@ class HiveTypeConverterSpec extends Specification {
223218
"struct<prediction_date:int,lower_confidence_amt:double,upper_confidence_amt:double,model_short_name:string>" || "struct<prediction_date:int,lower_confidence_amt:double,upper_confidence_amt:double,model_short_name:string>"
224219
"struct<prediction_date:int,lower_confidence_amt:int,upper_confidence_amt:int,model_short_name:string>" || "struct<prediction_date:int,lower_confidence_amt:int,upper_confidence_amt:int,model_short_name:string>"
225220
"struct<prediction_date:int,prediction_source:string>" || "struct<prediction_date:int,prediction_source:string>"
226-
227-
'array<struct<field2:decimal(38 ),countryCodes:array<string>,source:string>>' || 'array<struct<field2:decimal(38),countryCodes:array<string>,source:string>>'
228-
"struct<Field3:struct<Nested_Field1:bigint,Nested_Field2:bigint,Nested_FIELD3:decimal( 38, 9)>>" || "struct<Field3:struct<Nested_Field1:bigint,Nested_Field2:bigint,Nested_FIELD3:decimal(38,9)>>"
229-
"struct<Field1:decimal (38,9 ),Field2:bigint,field3:struct<NESTED_Field1:decimal ( 38,9 ),NesteD_Field2:bigint>>" || "struct<Field1:decimal(38,9),Field2:bigint,field3:struct<NESTED_Field1:decimal(38,9),NesteD_Field2:bigint>>"
230221
}
231222

232223
@Unroll
@@ -242,17 +233,4 @@ class HiveTypeConverterSpec extends Specification {
242233
]
243234
}
244235

245-
@Unroll
246-
def 'case reserve fieldName Fidelity'(String typeString, String expectedString) {
247-
expect:
248-
def result = converter.fromMetacatTypeToJson(converter.toMetacatType(typeString)).toString()
249-
250-
assert result == expectedString
251-
252-
where:
253-
typeString | expectedString
254-
"struct<Field1:bigint,Field2:bigint,field3:struct<nested_Field1:bigint,nested_Field2:bigint>>" | """{"type":"row","fields":[{"name":"Field1","type":"bigint"},{"name":"Field2","type":"bigint"},{"name":"field3","type":{"type":"row","fields":[{"name":"nested_Field1","type":"bigint"},{"name":"nested_Field2","type":"bigint"}]}}]}"""
255-
"array<struct<date:string,countryCodes:array<string>,source:string>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}"""
256-
"array<struct<Date:string,nestedArray:array<struct<date:string,countryCodes:array<string>,source:string>>>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"Date","type":"string"},{"name":"nestedArray","type":{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}}]}}"""
257-
}
258236
}

metacat-functional-tests/metacat-test-cluster/etc-metacat/data/metadata/00000-0b60cc39-438f-413e-96c2-2694d7926529.metadata.json

Lines changed: 0 additions & 157 deletions
This file was deleted.

0 commit comments

Comments
 (0)