Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CHORE] Implementations of FixedSizeListArray #1281

Merged
merged 14 commits into from
Aug 28, 2023
10 changes: 2 additions & 8 deletions src/daft-core/src/array/growable/arrow_growable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ use crate::{
},
datatypes::{
BinaryType, BooleanType, DaftArrowBackedType, DaftDataType, ExtensionArray, Field,
FixedSizeListType, Float32Type, Float64Type, Int128Type, Int16Type, Int32Type, Int64Type,
Int8Type, ListType, NullType, StructType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
Utf8Type,
Float32Type, Float64Type, Int128Type, Int16Type, Int32Type, Int64Type, Int8Type, ListType,
NullType, StructType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, Utf8Type,
},
DataType, IntoSeries, Series,
};
Expand Down Expand Up @@ -170,11 +169,6 @@ impl_arrow_backed_data_array_growable!(
ListType,
arrow2::array::growable::GrowableList<'a, i64>
);
impl_arrow_backed_data_array_growable!(
ArrowFixedSizeListGrowable,
FixedSizeListType,
arrow2::array::growable::GrowableFixedSizeList<'a>
);
impl_arrow_backed_data_array_growable!(
ArrowStructGrowable,
StructType,
Expand Down
10 changes: 6 additions & 4 deletions src/daft-core/src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ use crate::{
DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray,
FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray,
},
BinaryArray, BooleanArray, ExtensionArray, FixedSizeListArray, Float32Array, Float64Array,
Int128Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, NullArray,
StructArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array,
nested_arrays::FixedSizeListArray,
BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int128Array,
Int16Array, Int32Array, Int64Array, Int8Array, ListArray, NullArray, StructArray,
UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array,
},
DataType, Series,
};

mod arrow_growable;
mod logical_growable;
mod nested_growable;

#[cfg(feature = "python")]
mod python_growable;
Expand Down Expand Up @@ -120,7 +122,7 @@ impl_growable_array!(Utf8Array, arrow_growable::ArrowUtf8Growable<'a>);
impl_growable_array!(ListArray, arrow_growable::ArrowListGrowable<'a>);
impl_growable_array!(
FixedSizeListArray,
arrow_growable::ArrowFixedSizeListGrowable<'a>
nested_growable::FixedSizeListGrowable<'a>
);
impl_growable_array!(StructArray, arrow_growable::ArrowStructGrowable<'a>);
impl_growable_array!(ExtensionArray, arrow_growable::ArrowExtensionGrowable<'a>);
Expand Down
120 changes: 120 additions & 0 deletions src/daft-core/src/array/growable/nested_growable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use std::mem::swap;

use common_error::DaftResult;

use crate::{
datatypes::{nested_arrays::FixedSizeListArray, Field},
with_match_daft_types, DataType, IntoSeries, Series,
};

use super::{Growable, GrowableArray};

pub struct ArrowBitmapGrowable<'a> {
bitmap_refs: Vec<Option<&'a arrow2::bitmap::Bitmap>>,
mutable_bitmap: arrow2::bitmap::MutableBitmap,
}

impl<'a> ArrowBitmapGrowable<'a> {
pub fn new(bitmap_refs: Vec<Option<&'a arrow2::bitmap::Bitmap>>, capacity: usize) -> Self {
Self {
bitmap_refs,
mutable_bitmap: arrow2::bitmap::MutableBitmap::with_capacity(capacity),
}
}

pub fn extend(&mut self, index: usize, start: usize, len: usize) {
let bm = self.bitmap_refs.get(index).unwrap();
match bm {
None => self.mutable_bitmap.extend_constant(len, true),
Some(bm) => {
let (bm_data, bm_start, _bm_len) = bm.as_slice();
self.mutable_bitmap
.extend_from_slice(bm_data, bm_start + start, len)
}
}
}

fn add_nulls(&mut self, additional: usize) {
self.mutable_bitmap.extend_constant(additional, false)
}

fn build(self) -> arrow2::bitmap::Bitmap {
self.mutable_bitmap.clone().into()
}
}

pub struct FixedSizeListGrowable<'a> {
name: String,
dtype: DataType,
element_fixed_len: usize,
child_growable: Box<dyn Growable + 'a>,
growable_validity: ArrowBitmapGrowable<'a>,
}

impl<'a> FixedSizeListGrowable<'a> {
pub fn new(
name: String,
dtype: &DataType,
arrays: Vec<&'a FixedSizeListArray>,
use_validity: bool,
capacity: usize,
) -> Self {
match dtype {
DataType::FixedSizeList(child_field, element_fixed_len) => {
with_match_daft_types!(&child_field.dtype, |$T| {
let child_growable = <<$T as DaftDataType>::ArrayType as GrowableArray>::make_growable(
name.clone(),
&child_field.dtype,
arrays.iter().map(|a| a.flat_child.downcast::<<$T as DaftDataType>::ArrayType>().unwrap()).collect::<Vec<_>>(),
use_validity,
capacity * element_fixed_len,
);
let growable_validity = ArrowBitmapGrowable::new(
arrays.iter().map(|a| a.validity.as_ref()).collect(),
capacity,
);
Self {
name,
dtype: dtype.clone(),
element_fixed_len: *element_fixed_len,
child_growable: Box::new(child_growable),
growable_validity,
}
})
}
_ => panic!("Cannot create FixedSizeListGrowable from dtype: {}", dtype),
}
}
}

impl<'a> Growable for FixedSizeListGrowable<'a> {
fn extend(&mut self, index: usize, start: usize, len: usize) {
self.child_growable.extend(
index,
start * self.element_fixed_len,
len * self.element_fixed_len,
);
self.growable_validity.extend(index, start, len);
}

fn add_nulls(&mut self, additional: usize) {
self.child_growable
.add_nulls(additional * self.element_fixed_len);
self.growable_validity.add_nulls(additional);
}

fn build(&mut self) -> DaftResult<Series> {
// Swap out self.growable_validity so we can use the values and move it
let mut grown_validity = ArrowBitmapGrowable::new(vec![], 0);
swap(&mut self.growable_validity, &mut grown_validity);

let built_child = self.child_growable.build()?;
let built_validity = grown_validity.build();
Ok(FixedSizeListArray::new(
Field::new(self.name.clone(), self.dtype.clone()),
built_child,
Some(built_validity),
)
.into_series())
}
}
8 changes: 4 additions & 4 deletions src/daft-core/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ where
self.data().len()
}

pub fn data_type(&self) -> &DataType {
&self.field.dtype
}

pub fn is_empty(&self) -> bool {
self.len() == 0
}
Expand Down Expand Up @@ -92,10 +96,6 @@ where
self.data.as_ref()
}

pub fn data_type(&self) -> &DataType {
&self.field.dtype
}

pub fn name(&self) -> &str {
self.field.name.as_str()
}
Expand Down
10 changes: 2 additions & 8 deletions src/daft-core/src/array/ops/as_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ use crate::{
array::DataArray,
datatypes::{
logical::{
DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray,
FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray,
DateArray, Decimal128Array, DurationArray, ImageArray, TensorArray, TimestampArray,
},
BinaryArray, BooleanArray, DaftNumericType, FixedSizeListArray, ListArray, NullArray,
StructArray, Utf8Array,
BinaryArray, BooleanArray, DaftNumericType, ListArray, NullArray, StructArray, Utf8Array,
},
};

Expand Down Expand Up @@ -64,7 +62,6 @@ impl_asarrow_dataarray!(Utf8Array, array::Utf8Array<i64>);
impl_asarrow_dataarray!(BooleanArray, array::BooleanArray);
impl_asarrow_dataarray!(BinaryArray, array::BinaryArray<i64>);
impl_asarrow_dataarray!(ListArray, array::ListArray<i64>);
impl_asarrow_dataarray!(FixedSizeListArray, array::FixedSizeListArray);
impl_asarrow_dataarray!(StructArray, array::StructArray);

#[cfg(feature = "python")]
Expand All @@ -74,8 +71,5 @@ impl_asarrow_logicalarray!(Decimal128Array, array::PrimitiveArray<i128>);
impl_asarrow_logicalarray!(DateArray, array::PrimitiveArray<i32>);
impl_asarrow_logicalarray!(DurationArray, array::PrimitiveArray<i64>);
impl_asarrow_logicalarray!(TimestampArray, array::PrimitiveArray<i64>);
impl_asarrow_logicalarray!(EmbeddingArray, array::FixedSizeListArray);
impl_asarrow_logicalarray!(ImageArray, array::StructArray);
impl_asarrow_logicalarray!(FixedShapeImageArray, array::FixedSizeListArray);
impl_asarrow_logicalarray!(TensorArray, array::StructArray);
impl_asarrow_logicalarray!(FixedShapeTensorArray, array::FixedSizeListArray);
23 changes: 22 additions & 1 deletion src/daft-core/src/array/ops/broadcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
growable::{Growable, GrowableArray},
DataArray,
},
datatypes::{DaftArrayType, DaftPhysicalType, DataType},
datatypes::{nested_arrays::FixedSizeListArray, DaftArrayType, DaftPhysicalType, DataType},
};

use common_error::{DaftError, DaftResult};
Expand Down Expand Up @@ -53,3 +53,24 @@ where
}
}
}

impl Broadcastable for FixedSizeListArray {
fn broadcast(&self, num: usize) -> DaftResult<Self> {
if self.len() != 1 {
return Err(DaftError::ValueError(format!(
"Attempting to broadcast non-unit length Array named: {}",
self.name()
)));
}

if self.is_valid(0) {
generic_growable_broadcast(self, num, self.name(), self.data_type())
} else {
Ok(FixedSizeListArray::full_null(
self.name(),
self.data_type(),
num,
))
}
}
}
Loading
Loading