31
31
import com .netflix .metacat .common .type .VarcharType ;
32
32
import lombok .extern .slf4j .Slf4j ;
33
33
import org .apache .hadoop .hive .serde .serdeConstants ;
34
+ import org .apache .hadoop .hive .serde2 .objectinspector .ListObjectInspector ;
35
+ import org .apache .hadoop .hive .serde2 .objectinspector .MapObjectInspector ;
36
+ import org .apache .hadoop .hive .serde2 .objectinspector .ObjectInspector ;
37
+ import org .apache .hadoop .hive .serde2 .objectinspector .PrimitiveObjectInspector ;
34
38
import org .apache .hadoop .hive .serde2 .objectinspector .PrimitiveObjectInspector .PrimitiveCategory ;
35
- import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfo ;
36
- import org .apache .hadoop .hive .serde2 .typeinfo .ListTypeInfo ;
37
- import org .apache .hadoop .hive .serde2 .typeinfo .MapTypeInfo ;
38
- import org .apache .hadoop .hive .serde2 .typeinfo .StructTypeInfo ;
39
- import org .apache .hadoop .hive .serde2 .typeinfo .PrimitiveTypeInfo ;
40
- import org .apache .hadoop .hive .serde2 .typeinfo .DecimalTypeInfo ;
39
+ import org .apache .hadoop .hive .serde2 .objectinspector .StandardStructObjectInspector ;
40
+ import org .apache .hadoop .hive .serde2 .objectinspector .StructField ;
41
+ import org .apache .hadoop .hive .serde2 .objectinspector .StructObjectInspector ;
41
42
import org .apache .hadoop .hive .serde2 .typeinfo .CharTypeInfo ;
42
- import org .apache .hadoop .hive .serde2 .typeinfo .VarcharTypeInfo ;
43
+ import org .apache .hadoop .hive .serde2 .typeinfo .DecimalTypeInfo ;
44
+ import org .apache .hadoop .hive .serde2 .typeinfo .StructTypeInfo ;
45
+ import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfo ;
43
46
import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfoUtils ;
47
+ import org .apache .hadoop .hive .serde2 .typeinfo .VarcharTypeInfo ;
44
48
import org .apache .iceberg .PartitionField ;
45
49
import org .apache .iceberg .Schema ;
46
50
import org .apache .iceberg .types .Types ;
@@ -74,20 +78,24 @@ public class HiveTypeConverter implements ConnectorTypeConverter {
74
78
private static final Pattern DECIMAL_TYPE
75
79
= Pattern .compile (DECIMAL_WITH_SCALE + "|" + DECIMAL_WITH_SCALE_AND_PRECISION , Pattern .CASE_INSENSITIVE );
76
80
77
- private static Type getPrimitiveType (final TypeInfo typeInfo ) {
78
- final PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo ) typeInfo ).getPrimitiveCategory ();
81
+ private static Type getPrimitiveType (final ObjectInspector fieldInspector ) {
82
+ final PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector ) fieldInspector )
83
+ .getPrimitiveCategory ();
79
84
if (HiveTypeMapping .getHIVE_TO_CANONICAL ().containsKey (primitiveCategory .name ())) {
80
85
return HiveTypeMapping .getHIVE_TO_CANONICAL ().get (primitiveCategory .name ());
81
86
}
82
87
switch (primitiveCategory ) {
83
88
case DECIMAL :
84
- final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo ) typeInfo ;
89
+ final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo ) ((PrimitiveObjectInspector ) fieldInspector )
90
+ .getTypeInfo ();
85
91
return DecimalType .createDecimalType (decimalTypeInfo .precision (), decimalTypeInfo .getScale ());
86
92
case CHAR :
87
- final int cLength = ((CharTypeInfo ) typeInfo ).getLength ();
93
+ final int cLength = ((CharTypeInfo ) ((PrimitiveObjectInspector )
94
+ fieldInspector ).getTypeInfo ()).getLength ();
88
95
return CharType .createCharType (cLength );
89
96
case VARCHAR :
90
- final int vLength = ((VarcharTypeInfo ) typeInfo ).getLength ();
97
+ final int vLength = ((VarcharTypeInfo ) ((PrimitiveObjectInspector ) fieldInspector )
98
+ .getTypeInfo ()).getLength ();
91
99
return VarcharType .createVarcharType (vLength );
92
100
default :
93
101
return null ;
@@ -98,7 +106,17 @@ private static Type getPrimitiveType(final TypeInfo typeInfo) {
98
106
public Type toMetacatType (final String type ) {
99
107
// Hack to fix presto "varchar" type coming in with no length which is required by Hive.
100
108
final TypeInfo typeInfo = TypeInfoUtils .getTypeInfoFromTypeString (sanitizeType (type ));
101
- return getCanonicalType (typeInfo );
109
+ ObjectInspector oi = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo (typeInfo );
110
+ // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
111
+ // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
112
+ // their original case
113
+ if (typeInfo .getCategory ().equals (ObjectInspector .Category .STRUCT )) {
114
+ final StructTypeInfo structTypeInfo = (StructTypeInfo ) typeInfo ;
115
+ final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector ) oi ;
116
+ oi = new HiveTypeConverter .SameCaseStandardStructObjectInspector (
117
+ structTypeInfo .getAllStructFieldNames (), objectInspector );
118
+ }
119
+ return getCanonicalType (oi );
102
120
}
103
121
104
122
/**
@@ -287,48 +305,43 @@ public static String sanitizeType(final String type) {
287
305
/**
288
306
* Returns the canonical type.
289
307
*
290
- * @param typeInfo typeInfo
291
- * @return Metacat Type
308
+ * @param fieldInspector inspector
309
+ * @return type
292
310
*/
293
- Type getCanonicalType (final TypeInfo typeInfo ) {
294
- switch (typeInfo .getCategory ()) {
311
+ Type getCanonicalType (final ObjectInspector fieldInspector ) {
312
+ switch (fieldInspector .getCategory ()) {
295
313
case PRIMITIVE :
296
- return getPrimitiveType (typeInfo );
314
+ return getPrimitiveType (fieldInspector );
297
315
case MAP :
298
- final MapTypeInfo mapTypeInfo =
299
- TypeUtils .checkType (typeInfo , MapTypeInfo .class , "typeInfo" );
300
- final Type keyType = getCanonicalType (mapTypeInfo .getMapKeyTypeInfo ());
301
- final Type valueType = getCanonicalType (mapTypeInfo .getMapValueTypeInfo ());
316
+ final MapObjectInspector mapObjectInspector =
317
+ TypeUtils .checkType (fieldInspector , MapObjectInspector .class ,
318
+ "fieldInspector" );
319
+ final Type keyType = getCanonicalType (mapObjectInspector .getMapKeyObjectInspector ());
320
+ final Type valueType = getCanonicalType (mapObjectInspector .getMapValueObjectInspector ());
302
321
if (keyType == null || valueType == null ) {
303
322
return null ;
304
323
}
305
324
return TypeRegistry .getTypeRegistry ().getParameterizedType (TypeEnum .MAP ,
306
325
ImmutableList .of (keyType .getTypeSignature (), valueType .getTypeSignature ()), ImmutableList .of ());
307
326
case LIST :
308
- final ListTypeInfo listTypeInfo =
309
- TypeUtils .checkType (typeInfo , ListTypeInfo .class , "typeInfo" );
327
+ final ListObjectInspector listObjectInspector =
328
+ TypeUtils .checkType (fieldInspector , ListObjectInspector .class ,
329
+ "fieldInspector" );
310
330
final Type elementType =
311
- getCanonicalType (listTypeInfo . getListElementTypeInfo ());
331
+ getCanonicalType (listObjectInspector . getListElementObjectInspector ());
312
332
if (elementType == null ) {
313
333
return null ;
314
334
}
315
335
return TypeRegistry .getTypeRegistry ().getParameterizedType (TypeEnum .ARRAY ,
316
336
ImmutableList .of (elementType .getTypeSignature ()), ImmutableList .of ());
317
337
case STRUCT :
318
- final StructTypeInfo structTypeInfo =
319
- TypeUtils .checkType (typeInfo , StructTypeInfo .class , "typeInfo" );
320
- // Hive struct type infos
321
- final List <String > structFieldNames = structTypeInfo .getAllStructFieldNames ();
322
- final List <TypeInfo > structFieldTypeInfos = structTypeInfo .getAllStructFieldTypeInfos ();
323
- final int structInfoCounts = structFieldNames .size ();
324
-
325
- // Metacat canonical type infos
326
- final List <TypeSignature > fieldTypes = new ArrayList <>(structInfoCounts );
327
- final List <Object > fieldNames = new ArrayList <>(structInfoCounts );
328
-
329
- for (int i = 0 ; i < structInfoCounts ; i ++) {
330
- fieldNames .add (structFieldNames .get (i ));
331
- final Type fieldType = getCanonicalType (structFieldTypeInfos .get (i ));
338
+ final StructObjectInspector structObjectInspector =
339
+ TypeUtils .checkType (fieldInspector , StructObjectInspector .class , "fieldInspector" );
340
+ final List <TypeSignature > fieldTypes = new ArrayList <>();
341
+ final List <Object > fieldNames = new ArrayList <>();
342
+ for (StructField field : structObjectInspector .getAllStructFieldRefs ()) {
343
+ fieldNames .add (field .getFieldName ());
344
+ final Type fieldType = getCanonicalType (field .getFieldObjectInspector ());
332
345
if (fieldType == null ) {
333
346
return null ;
334
347
}
@@ -337,8 +350,42 @@ Type getCanonicalType(final TypeInfo typeInfo) {
337
350
return TypeRegistry .getTypeRegistry ()
338
351
.getParameterizedType (TypeEnum .ROW , fieldTypes , fieldNames );
339
352
default :
340
- log .info ("Currently unsupported type {}, returning Unknown type" , typeInfo .getTypeName ());
353
+ log .info ("Currently unsupported type {}, returning Unknown type" , fieldInspector .getTypeName ());
341
354
return BaseType .UNKNOWN ;
342
355
}
343
356
}
357
+
358
+ // This is protected and extends StandardStructObjectInspector so it can reference MyField
359
+ protected static class SameCaseStandardStructObjectInspector extends StandardStructObjectInspector {
360
+ private final List <String > realFieldNames ;
361
+ private final StandardStructObjectInspector structObjectInspector ;
362
+
363
+ public SameCaseStandardStructObjectInspector (final List <String > realFieldNames ,
364
+ final StandardStructObjectInspector structObjectInspector ) {
365
+ this .realFieldNames = realFieldNames ;
366
+ this .structObjectInspector = structObjectInspector ;
367
+ }
368
+
369
+ @ Override
370
+ public List <? extends StructField > getAllStructFieldRefs () {
371
+ return structObjectInspector .getAllStructFieldRefs ()
372
+ .stream ()
373
+ .map (structField -> (MyField ) structField )
374
+ .map (field -> new HiveTypeConverter .
375
+ SameCaseStandardStructObjectInspector .SameCaseMyField (field .getFieldID (),
376
+ realFieldNames .get (field .getFieldID ()),
377
+ field .getFieldObjectInspector (), field .getFieldComment ()))
378
+ .collect (Collectors .toList ());
379
+ }
380
+
381
+ protected static class SameCaseMyField extends MyField {
382
+ public SameCaseMyField (final int fieldID , final String fieldName ,
383
+ final ObjectInspector fieldObjectInspector ,
384
+ final String fieldComment ) {
385
+ super (fieldID , fieldName , fieldObjectInspector , fieldComment );
386
+ // Since super lower cases fieldName, this is to restore the original case
387
+ this .fieldName = fieldName ;
388
+ }
389
+ }
390
+ }
344
391
}
0 commit comments