Skip to content

Commit

Permalink
Merge pull request #204 from xitongsys/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
xitongsys authored Jan 29, 2020
2 parents bd0ac08 + 818e1c3 commit 75349bf
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 31 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ There are two types in Parquet: Primitive Type and Logical Type. Logical types a
|BYTE_ARRAY|BYTE_ARRAY|string|
|FIXED_LEN_BYTE_ARRAY|FIXED_LEN_BYTE_ARRAY|string|
|UTF8|BYTE_ARRAY|string|
|INT_8|INT32|int32|
|INT_16|INT32|int32|
|INT_8|INT32|int8|
|INT_16|INT32|int16|
|INT_32|INT32|int32|
|INT_64|INT64|int64|
|UINT_8|INT32|uint32|
|UINT_16|INT32|uint32|
|UINT_8|INT32|uint8|
|UINT_16|INT32|uint16|
|UINT_32|INT32|uint32|
|UINT_64|INT64|uint64|
|DATE|INT32|int32|
Expand Down
6 changes: 3 additions & 3 deletions example/column_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ func main() {
log.Println("Can't create parquet writer")
return
}
num := 10
for i := 0; i < num; i++ {
num := int64(10)
for i := 0; int64(i) < num; i++ {
stu := Student{
Name: "StudentName",
Age: int32(20 + i%5),
Expand Down Expand Up @@ -70,7 +70,7 @@ func main() {
log.Println("Can't create column reader", err)
return
}
num = int(pr.GetNumRows())
num = int64(pr.GetNumRows())

pr.SkipRowsByPath("parquet_go_root.name", 5) //skip the first five rows
names, rls, dls, err = pr.ReadColumnByPath("parquet_go_root.name", num)
Expand Down
16 changes: 8 additions & 8 deletions example/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ type TypeList struct {
FixedLenByteArray string `parquet:"name=FixedLenByteArray, type=FIXED_LEN_BYTE_ARRAY, length=10"`

Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"`
Int_8 int32 `parquet:"name=int_8, type=INT_8"`
Int_16 int32 `parquet:"name=int_16, type=INT_16"`
Int_8 int8 `parquet:"name=int_8, type=INT_8"`
Int_16 int16 `parquet:"name=int_16, type=INT_16"`
Int_32 int32 `parquet:"name=int_32, type=INT_32"`
Int_64 int64 `parquet:"name=int_64, type=INT_64"`
Uint_8 uint32 `parquet:"name=uint_8, type=UINT_8"`
Uint_16 uint32 `parquet:"name=uint_16, type=UINT_16"`
Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"`
Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"`
Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"`
Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"`
Date int32 `parquet:"name=date, type=DATE"`
Expand Down Expand Up @@ -71,12 +71,12 @@ func main() {
FixedLenByteArray: "HelloWorld",

Utf8: "utf8",
Int_8: int32(i),
Int_16: int32(i),
Int_8: int8(i),
Int_16: int16(i),
Int_32: int32(i),
Int_64: int64(i),
Uint_8: uint32(i),
Uint_16: uint32(i),
Uint_8: uint8(i),
Uint_16: uint16(i),
Uint_32: uint32(i),
Uint_64: uint64(i),
Date: int32(i),
Expand Down
4 changes: 2 additions & 2 deletions schema/gettype.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ func (self *SchemaHandler) GetTypes() []reflect.Type {

if nc == 0 {
if *rT != parquet.FieldRepetitionType_REPEATED {
elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, rT)
elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, cT, rT)

} else {
elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, nil))
elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, cT, nil))
}

} else {
Expand Down
130 changes: 116 additions & 14 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,44 @@ func TypeNameToParquetType(name string, baseName string) (*parquet.Type, *parque
panic(fmt.Errorf("Unknown data type: '%s'", name))
}

func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionType) reflect.Type {
func ParquetTypeToGoReflectType(pT *parquet.Type, cT *parquet.ConvertedType, rT *parquet.FieldRepetitionType) reflect.Type {
if rT == nil || *rT != parquet.FieldRepetitionType_OPTIONAL {
if *pT == parquet.Type_BOOLEAN {
return reflect.TypeOf(true)

} else if *pT == parquet.Type_INT32 && cT == nil {
return reflect.TypeOf(int32(0))

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 {
return reflect.TypeOf(int8(0))

}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 {
return reflect.TypeOf(int16(0))

}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 {
return reflect.TypeOf(int32(0))

}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 {
return reflect.TypeOf(uint8(0))

}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 {
return reflect.TypeOf(uint16(0))

}else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 {
return reflect.TypeOf(uint32(0))

} else if *pT == parquet.Type_INT32 {
return reflect.TypeOf(int32(0))

}else if *pT == parquet.Type_INT64 && cT == nil {
return reflect.TypeOf(int64(0))

}else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 {
return reflect.TypeOf(int64(0))

}else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 {
return reflect.TypeOf(uint64(0))

} else if *pT == parquet.Type_INT64 {
return reflect.TypeOf(int64(0))

Expand Down Expand Up @@ -104,10 +134,50 @@ func ParquetTypeToGoReflectType(pT *parquet.Type, rT *parquet.FieldRepetitionTyp
v := true
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && cT == nil{
v := int32(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_8 {
v := int8(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_16 {
v := int16(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_INT_32 {
v := int32(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_8 {
v := uint8(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_16 {
v := uint16(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 && *cT == parquet.ConvertedType_UINT_32 {
v := uint32(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT32 {
v := int32(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT64 && cT == nil {
v := int64(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_INT_64 {
v := int64(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT64 && *cT == parquet.ConvertedType_UINT_64 {
v := uint64(0)
return reflect.TypeOf(&v)

} else if *pT == parquet.Type_INT64 {
v := int64(0)
return reflect.TypeOf(&v)
Expand Down Expand Up @@ -146,10 +216,14 @@ func ParquetTypeToGoType(src interface{}, pT *parquet.Type, cT *parquet.Converte
return src
}

if *cT == parquet.ConvertedType_UINT_8 {
return uint32(src.(int32))
if *cT == parquet.ConvertedType_INT_8 {
return int8(src.(int32))
} else if *cT == parquet.ConvertedType_INT_16 {
return int16(src.(int32))
} else if *cT == parquet.ConvertedType_UINT_8 {
return uint8(src.(int32))
} else if *cT == parquet.ConvertedType_UINT_16 {
return uint32(src.(int32))
return uint16(src.(int32))
} else if *cT == parquet.ConvertedType_UINT_32 {
return uint32(src.(int32))
} else if *cT == parquet.ConvertedType_UINT_64 {
Expand All @@ -164,10 +238,14 @@ func GoTypeToParquetType(src interface{}, pT *parquet.Type, cT *parquet.Converte
return src
}

if *cT == parquet.ConvertedType_UINT_8 {
return int32(src.(uint32))
if *cT == parquet.ConvertedType_INT_8 {
return int32(src.(int8))
}else if *cT == parquet.ConvertedType_INT_16 {
return int32(src.(int16))
} else if *cT == parquet.ConvertedType_UINT_8 {
return int32(src.(uint8))
} else if *cT == parquet.ConvertedType_UINT_16 {
return int32(src.(uint32))
return int32(src.(uint16))
} else if *cT == parquet.ConvertedType_UINT_32 {
return int32(src.(uint32))
} else if *cT == parquet.ConvertedType_UINT_64 {
Expand Down Expand Up @@ -221,16 +299,40 @@ func StrToParquetType(s string, pT *parquet.Type, cT *parquet.ConvertedType, len
if *cT == parquet.ConvertedType_UTF8 {
return s

} else if *cT == parquet.ConvertedType_INT_8 || *cT == parquet.ConvertedType_INT_16 || *cT == parquet.ConvertedType_INT_32 ||
*cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
} else if *cT == parquet.ConvertedType_INT_8 {
var v int8
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_INT_16 {
var v int16
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_INT_32 {
var v int32
fmt.Sscanf(s, "%d", &v)
return v
return int32(v)

} else if *cT == parquet.ConvertedType_UINT_8 || *cT == parquet.ConvertedType_UINT_16 || *cT == parquet.ConvertedType_UINT_32 {
var vt uint32
fmt.Sscanf(s, "%d", &vt)
return int32(vt)
} else if *cT == parquet.ConvertedType_UINT_8 {
var v uint8
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_UINT_16 {
var v uint16
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_UINT_32 {
var v uint32
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
var v int32
fmt.Sscanf(s, "%d", &v)
return int32(v)

} else if *cT == parquet.ConvertedType_UINT_64 {
var vt uint64
Expand Down

0 comments on commit 75349bf

Please sign in to comment.