Skip to content

Commit

Permalink
initial commit (#1564)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunchao authored Apr 15, 2022
1 parent 6083a91 commit 2bcc0cf
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 9 deletions.
38 changes: 36 additions & 2 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ def assert_pyarrow_leak():
pa.decimal128(19, 4),
pa.string(),
pa.binary(),
pa.binary(10),
pa.large_string(),
pa.large_binary(),
pa.list_(pa.int32()),
pa.list_(pa.int32(), 2),
pa.large_list(pa.uint16()),
pa.struct(
[
Expand All @@ -85,8 +87,6 @@ def assert_pyarrow_leak():
_unsupported_pyarrow_types = [
pa.decimal256(76, 38),
pa.duration("s"),
pa.binary(10),
pa.list_(pa.int32(), 2),
pa.map_(pa.string(), pa.int32()),
pa.union(
[pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
Expand Down Expand Up @@ -190,6 +190,29 @@ def test_time32_python():
del b
del expected

def test_binary_array():
"""
Python -> Rust -> Python
"""
a = pa.array(["a", None, "bb", "ccc"], pa.binary())
b = rust.round_trip_array(a)
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
del a
del b

def test_fixed_len_binary_array():
"""
Python -> Rust -> Python
"""
a = pa.array(["aaa", None, "bbb", "ccc"], pa.binary(3))
b = rust.round_trip_array(a)
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
del a
del b

def test_list_array():
"""
Expand All @@ -203,6 +226,17 @@ def test_list_array():
del a
del b

def test_fixed_len_list_array():
"""
Python -> Rust -> Python
"""
a = pa.array([[1, 2], None, [3, 4], [5, 6]], pa.list_(pa.int64(), 2))
b = rust.round_trip_array(a)
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
del a
del b

def test_timestamp_python():
"""
Expand Down
116 changes: 113 additions & 3 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {

#[cfg(test)]
mod tests {
use crate::array::{DictionaryArray, Int32Array, StringArray};
use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array, StringArray};
use crate::buffer::Buffer;
use crate::error::Result;
use crate::util::bit_util;
use crate::{
array::{
Array, ArrayData, BooleanArray, Int64Array, StructArray, UInt32Array,
UInt64Array,
Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array,
StructArray, UInt32Array, UInt64Array,
},
datatypes::{DataType, Field},
ffi::ArrowArray,
Expand Down Expand Up @@ -149,4 +151,112 @@ mod tests {
let data = array.data();
test_round_trip(data)
}

#[test]
fn test_fixed_size_binary() -> Result<()> {
let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;

let data = array.data();
test_round_trip(data)
}

#[test]
fn test_fixed_size_binary_with_nulls() -> Result<()> {
let values = vec![
None,
Some(vec![10, 10, 10]),
None,
Some(vec![20, 20, 20]),
Some(vec![30, 30, 30]),
None,
];
let array = FixedSizeBinaryArray::try_from_sparse_iter(values.into_iter())?;

let data = array.data();
test_round_trip(data)
}

#[test]
fn test_fixed_size_list() -> Result<()> {
let v: Vec<i64> = (0..9).into_iter().collect();
let value_data = ArrayData::builder(DataType::Int64)
.len(9)
.add_buffer(Buffer::from_slice_ref(&v))
.build()?;
let list_data_type =
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int64, false)), 3);
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
.build()?;
let array = FixedSizeListArray::from(list_data);

let data = array.data();
test_round_trip(data)
}

#[test]
fn test_fixed_size_list_with_nulls() -> Result<()> {
// 0100 0110
let mut validity_bits: [u8; 1] = [0; 1];
bit_util::set_bit(&mut validity_bits, 1);
bit_util::set_bit(&mut validity_bits, 2);
bit_util::set_bit(&mut validity_bits, 6);

let v: Vec<i16> = (0..16).into_iter().collect();
let value_data = ArrayData::builder(DataType::Int16)
.len(16)
.add_buffer(Buffer::from_slice_ref(&v))
.build()?;
let list_data_type =
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int16, false)), 2);
let list_data = ArrayData::builder(list_data_type)
.len(8)
.null_bit_buffer(Buffer::from(validity_bits))
.add_child_data(value_data)
.build()?;
let array = FixedSizeListArray::from(list_data);

let data = array.data();
test_round_trip(data)
}

#[test]
fn test_fixed_size_list_nested() -> Result<()> {
let v: Vec<i32> = (0..16).into_iter().collect();
let value_data = ArrayData::builder(DataType::Int32)
.len(16)
.add_buffer(Buffer::from_slice_ref(&v))
.build()?;

let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
let value_offsets = Buffer::from_slice_ref(&offsets);
let inner_list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
.len(8)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build()?;

// 0000 0100
let mut validity_bits: [u8; 1] = [0; 1];
bit_util::set_bit(&mut validity_bits, 2);

let list_data_type = DataType::FixedSizeList(
Box::new(Field::new("f", inner_list_data_type, false)),
2,
);
let list_data = ArrayData::builder(list_data_type)
.len(4)
.null_bit_buffer(Buffer::from(validity_bits))
.add_child_data(inner_list_data)
.build()?;

let array = FixedSizeListArray::from(list_data);

let data = array.data();
test_round_trip(data)
}
}
28 changes: 27 additions & 1 deletion arrow/src/datatypes/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,23 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
// Parametrized types, requiring string parse
other => {
match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
// FixedSizeBinary type in format "w:num_bytes"
["w", num_bytes] => {
let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
ArrowError::CDataInterface(
"FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
})?;
DataType::FixedSizeBinary(parsed_num_bytes)
},
// FixedSizeList type in format "+w:num_elems"
["+w", num_elems] => {
let c_child = c_schema.child(0);
let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
ArrowError::CDataInterface(
"The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
})?;
DataType::FixedSizeList(Box::new(Field::try_from(c_child)?), parsed_num_elems)
},
// Decimal types in format "d:precision,scale" or "d:precision,scale,bitWidth"
["d", extra] => {
match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
Expand Down Expand Up @@ -178,7 +195,9 @@ impl TryFrom<&DataType> for FFI_ArrowSchema {
let format = get_format_string(dtype)?;
// allocate and hold the children
let children = match dtype {
DataType::List(child) | DataType::LargeList(child) => {
DataType::List(child)
| DataType::LargeList(child)
| DataType::FixedSizeList(child, _) => {
vec![FFI_ArrowSchema::try_from(child.as_ref())?]
}
DataType::Struct(fields) => fields
Expand Down Expand Up @@ -215,6 +234,8 @@ fn get_format_string(dtype: &DataType) -> Result<String> {
DataType::LargeBinary => Ok("Z".to_string()),
DataType::Utf8 => Ok("u".to_string()),
DataType::LargeUtf8 => Ok("U".to_string()),
DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{}", num_bytes)),
DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{}", num_elems)),
DataType::Decimal(precision, scale) => Ok(format!("d:{},{}", precision, scale)),
DataType::Date32 => Ok("tdD".to_string()),
DataType::Date64 => Ok("tdm".to_string()),
Expand Down Expand Up @@ -325,6 +346,11 @@ mod tests {
round_trip_type(DataType::Float64)?;
round_trip_type(DataType::Date64)?;
round_trip_type(DataType::Time64(TimeUnit::Nanosecond))?;
round_trip_type(DataType::FixedSizeBinary(12))?;
round_trip_type(DataType::FixedSizeList(
Box::new(Field::new("a", DataType::Int64, false)),
5,
))?;
round_trip_type(DataType::Utf8)?;
round_trip_type(DataType::List(Box::new(Field::new(
"a",
Expand Down
Loading

0 comments on commit 2bcc0cf

Please sign in to comment.