From db4f1dac314365a172da038aa2cfd5e0cead2bd9 Mon Sep 17 00:00:00 2001 From: xiarixiaoyao Date: Wed, 19 Oct 2022 10:55:41 +0800 Subject: [PATCH] fixed the bug, string should convert to utf8 for avro record --- .../org/apache/hudi/hadoop/utils/HiveAvroSerializer.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java index 15781edb26aeb..ff17fd10fc7fa 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java @@ -25,6 +25,7 @@ import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericRecord; import org.apache.avro.specific.SpecificRecordBase; +import org.apache.avro.util.Utf8; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; @@ -289,10 +290,10 @@ private Object serializePrimitive(PrimitiveObjectInspector fieldOI, Object struc return HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd, schema, decimal); case CHAR: HiveChar ch = (HiveChar)fieldOI.getPrimitiveJavaObject(structFieldData); - return ch.getStrippedValue(); + return new Utf8(ch.getStrippedValue()); case VARCHAR: HiveVarchar vc = (HiveVarchar)fieldOI.getPrimitiveJavaObject(structFieldData); - return vc.getValue(); + return new Utf8(vc.getValue()); case DATE: return DateWritable.dateToDays(((DateObjectInspector)fieldOI).getPrimitiveJavaObject(structFieldData)); case TIMESTAMP: