31
31
import com .netflix .metacat .common .type .VarcharType ;
32
32
import lombok .extern .slf4j .Slf4j ;
33
33
import org .apache .hadoop .hive .serde .serdeConstants ;
34
- import org .apache .hadoop .hive .serde2 .objectinspector .ListObjectInspector ;
35
- import org .apache .hadoop .hive .serde2 .objectinspector .MapObjectInspector ;
36
- import org .apache .hadoop .hive .serde2 .objectinspector .ObjectInspector ;
37
- import org .apache .hadoop .hive .serde2 .objectinspector .PrimitiveObjectInspector ;
38
34
import org .apache .hadoop .hive .serde2 .objectinspector .PrimitiveObjectInspector .PrimitiveCategory ;
39
- import org .apache .hadoop .hive .serde2 .objectinspector .StandardStructObjectInspector ;
40
- import org .apache .hadoop .hive .serde2 .objectinspector .StructField ;
41
- import org .apache .hadoop .hive .serde2 .objectinspector .StructObjectInspector ;
42
- import org .apache .hadoop .hive .serde2 .typeinfo .CharTypeInfo ;
43
- import org .apache .hadoop .hive .serde2 .typeinfo .DecimalTypeInfo ;
44
- import org .apache .hadoop .hive .serde2 .typeinfo .StructTypeInfo ;
45
35
import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfo ;
46
- import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfoUtils ;
36
+ import org .apache .hadoop .hive .serde2 .typeinfo .ListTypeInfo ;
37
+ import org .apache .hadoop .hive .serde2 .typeinfo .MapTypeInfo ;
38
+ import org .apache .hadoop .hive .serde2 .typeinfo .StructTypeInfo ;
39
+ import org .apache .hadoop .hive .serde2 .typeinfo .PrimitiveTypeInfo ;
40
+ import org .apache .hadoop .hive .serde2 .typeinfo .DecimalTypeInfo ;
41
+ import org .apache .hadoop .hive .serde2 .typeinfo .CharTypeInfo ;
47
42
import org .apache .hadoop .hive .serde2 .typeinfo .VarcharTypeInfo ;
43
+ import org .apache .hadoop .hive .serde2 .typeinfo .TypeInfoUtils ;
48
44
import org .apache .iceberg .PartitionField ;
49
45
import org .apache .iceberg .Schema ;
50
46
import org .apache .iceberg .types .Types ;
@@ -78,24 +74,20 @@ public class HiveTypeConverter implements ConnectorTypeConverter {
78
74
private static final Pattern DECIMAL_TYPE
79
75
= Pattern .compile (DECIMAL_WITH_SCALE + "|" + DECIMAL_WITH_SCALE_AND_PRECISION , Pattern .CASE_INSENSITIVE );
80
76
81
- private static Type getPrimitiveType (final ObjectInspector fieldInspector ) {
82
- final PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector ) fieldInspector )
83
- .getPrimitiveCategory ();
77
+ private static Type getPrimitiveType (final TypeInfo typeInfo ) {
78
+ final PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo ) typeInfo ).getPrimitiveCategory ();
84
79
if (HiveTypeMapping .getHIVE_TO_CANONICAL ().containsKey (primitiveCategory .name ())) {
85
80
return HiveTypeMapping .getHIVE_TO_CANONICAL ().get (primitiveCategory .name ());
86
81
}
87
82
switch (primitiveCategory ) {
88
83
case DECIMAL :
89
- final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo ) ((PrimitiveObjectInspector ) fieldInspector )
90
- .getTypeInfo ();
84
+ final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo ) typeInfo ;
91
85
return DecimalType .createDecimalType (decimalTypeInfo .precision (), decimalTypeInfo .getScale ());
92
86
case CHAR :
93
- final int cLength = ((CharTypeInfo ) ((PrimitiveObjectInspector )
94
- fieldInspector ).getTypeInfo ()).getLength ();
87
+ final int cLength = ((CharTypeInfo ) typeInfo ).getLength ();
95
88
return CharType .createCharType (cLength );
96
89
case VARCHAR :
97
- final int vLength = ((VarcharTypeInfo ) ((PrimitiveObjectInspector ) fieldInspector )
98
- .getTypeInfo ()).getLength ();
90
+ final int vLength = ((VarcharTypeInfo ) typeInfo ).getLength ();
99
91
return VarcharType .createVarcharType (vLength );
100
92
default :
101
93
return null ;
@@ -106,17 +98,7 @@ private static Type getPrimitiveType(final ObjectInspector fieldInspector) {
106
98
public Type toMetacatType (final String type ) {
107
99
// Hack to fix presto "varchar" type coming in with no length which is required by Hive.
108
100
final TypeInfo typeInfo = TypeInfoUtils .getTypeInfoFromTypeString (sanitizeType (type ));
109
- ObjectInspector oi = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo (typeInfo );
110
- // The standard struct object inspector forces field names to lower case, however in Metacat we need to preserve
111
- // the original case of the struct fields so we wrap it with our wrapper to force the fieldNames to keep
112
- // their original case
113
- if (typeInfo .getCategory ().equals (ObjectInspector .Category .STRUCT )) {
114
- final StructTypeInfo structTypeInfo = (StructTypeInfo ) typeInfo ;
115
- final StandardStructObjectInspector objectInspector = (StandardStructObjectInspector ) oi ;
116
- oi = new HiveTypeConverter .SameCaseStandardStructObjectInspector (
117
- structTypeInfo .getAllStructFieldNames (), objectInspector );
118
- }
119
- return getCanonicalType (oi );
101
+ return getCanonicalType (typeInfo );
120
102
}
121
103
122
104
/**
@@ -305,43 +287,48 @@ public static String sanitizeType(final String type) {
305
287
/**
306
288
* Returns the canonical type.
307
289
*
308
- * @param fieldInspector inspector
309
- * @return type
290
+ * @param typeInfo typeInfo
291
+ * @return Metacat Type
310
292
*/
311
- Type getCanonicalType (final ObjectInspector fieldInspector ) {
312
- switch (fieldInspector .getCategory ()) {
293
+ Type getCanonicalType (final TypeInfo typeInfo ) {
294
+ switch (typeInfo .getCategory ()) {
313
295
case PRIMITIVE :
314
- return getPrimitiveType (fieldInspector );
296
+ return getPrimitiveType (typeInfo );
315
297
case MAP :
316
- final MapObjectInspector mapObjectInspector =
317
- TypeUtils .checkType (fieldInspector , MapObjectInspector .class ,
318
- "fieldInspector" );
319
- final Type keyType = getCanonicalType (mapObjectInspector .getMapKeyObjectInspector ());
320
- final Type valueType = getCanonicalType (mapObjectInspector .getMapValueObjectInspector ());
298
+ final MapTypeInfo mapTypeInfo =
299
+ TypeUtils .checkType (typeInfo , MapTypeInfo .class , "typeInfo" );
300
+ final Type keyType = getCanonicalType (mapTypeInfo .getMapKeyTypeInfo ());
301
+ final Type valueType = getCanonicalType (mapTypeInfo .getMapValueTypeInfo ());
321
302
if (keyType == null || valueType == null ) {
322
303
return null ;
323
304
}
324
305
return TypeRegistry .getTypeRegistry ().getParameterizedType (TypeEnum .MAP ,
325
306
ImmutableList .of (keyType .getTypeSignature (), valueType .getTypeSignature ()), ImmutableList .of ());
326
307
case LIST :
327
- final ListObjectInspector listObjectInspector =
328
- TypeUtils .checkType (fieldInspector , ListObjectInspector .class ,
329
- "fieldInspector" );
308
+ final ListTypeInfo listTypeInfo =
309
+ TypeUtils .checkType (typeInfo , ListTypeInfo .class , "typeInfo" );
330
310
final Type elementType =
331
- getCanonicalType (listObjectInspector . getListElementObjectInspector ());
311
+ getCanonicalType (listTypeInfo . getListElementTypeInfo ());
332
312
if (elementType == null ) {
333
313
return null ;
334
314
}
335
315
return TypeRegistry .getTypeRegistry ().getParameterizedType (TypeEnum .ARRAY ,
336
316
ImmutableList .of (elementType .getTypeSignature ()), ImmutableList .of ());
337
317
case STRUCT :
338
- final StructObjectInspector structObjectInspector =
339
- TypeUtils .checkType (fieldInspector , StructObjectInspector .class , "fieldInspector" );
340
- final List <TypeSignature > fieldTypes = new ArrayList <>();
341
- final List <Object > fieldNames = new ArrayList <>();
342
- for (StructField field : structObjectInspector .getAllStructFieldRefs ()) {
343
- fieldNames .add (field .getFieldName ());
344
- final Type fieldType = getCanonicalType (field .getFieldObjectInspector ());
318
+ final StructTypeInfo structTypeInfo =
319
+ TypeUtils .checkType (typeInfo , StructTypeInfo .class , "typeInfo" );
320
+ // Hive struct type infos
321
+ final List <String > structFieldNames = structTypeInfo .getAllStructFieldNames ();
322
+ final List <TypeInfo > structFieldTypeInfos = structTypeInfo .getAllStructFieldTypeInfos ();
323
+ final int structInfoCounts = structFieldNames .size ();
324
+
325
+ // Metacat canonical type infos
326
+ final List <TypeSignature > fieldTypes = new ArrayList <>(structInfoCounts );
327
+ final List <Object > fieldNames = new ArrayList <>(structInfoCounts );
328
+
329
+ for (int i = 0 ; i < structInfoCounts ; i ++) {
330
+ fieldNames .add (structFieldNames .get (i ));
331
+ final Type fieldType = getCanonicalType (structFieldTypeInfos .get (i ));
345
332
if (fieldType == null ) {
346
333
return null ;
347
334
}
@@ -350,42 +337,8 @@ Type getCanonicalType(final ObjectInspector fieldInspector) {
350
337
return TypeRegistry .getTypeRegistry ()
351
338
.getParameterizedType (TypeEnum .ROW , fieldTypes , fieldNames );
352
339
default :
353
- log .info ("Currently unsupported type {}, returning Unknown type" , fieldInspector .getTypeName ());
340
+ log .info ("Currently unsupported type {}, returning Unknown type" , typeInfo .getTypeName ());
354
341
return BaseType .UNKNOWN ;
355
342
}
356
343
}
357
-
358
- // This is protected and extends StandardStructObjectInspector so it can reference MyField
359
- protected static class SameCaseStandardStructObjectInspector extends StandardStructObjectInspector {
360
- private final List <String > realFieldNames ;
361
- private final StandardStructObjectInspector structObjectInspector ;
362
-
363
- public SameCaseStandardStructObjectInspector (final List <String > realFieldNames ,
364
- final StandardStructObjectInspector structObjectInspector ) {
365
- this .realFieldNames = realFieldNames ;
366
- this .structObjectInspector = structObjectInspector ;
367
- }
368
-
369
- @ Override
370
- public List <? extends StructField > getAllStructFieldRefs () {
371
- return structObjectInspector .getAllStructFieldRefs ()
372
- .stream ()
373
- .map (structField -> (MyField ) structField )
374
- .map (field -> new HiveTypeConverter .
375
- SameCaseStandardStructObjectInspector .SameCaseMyField (field .getFieldID (),
376
- realFieldNames .get (field .getFieldID ()),
377
- field .getFieldObjectInspector (), field .getFieldComment ()))
378
- .collect (Collectors .toList ());
379
- }
380
-
381
- protected static class SameCaseMyField extends MyField {
382
- public SameCaseMyField (final int fieldID , final String fieldName ,
383
- final ObjectInspector fieldObjectInspector ,
384
- final String fieldComment ) {
385
- super (fieldID , fieldName , fieldObjectInspector , fieldComment );
386
- // Since super lower cases fieldName, this is to restore the original case
387
- this .fieldName = fieldName ;
388
- }
389
- }
390
- }
391
344
}
0 commit comments