Skip to content

Commit

Permalink
[BUG] Fix all-null ImageArray length issues (#2034)
Browse files Browse the repository at this point in the history
Fixes bug where a fast-path for `ImageArray` with all nulls would cause
a length-0 array to be created

Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
  • Loading branch information
jaychia and Jay Chia authored Mar 23, 2024
1 parent eb315a8 commit afd6a75
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 6 deletions.
6 changes: 5 additions & 1 deletion src/daft-core/src/array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ where
<L::PhysicalType as DaftDataType>::ArrayType: FullNull,
{
fn full_null(name: &str, dtype: &DataType, length: usize) -> Self {
let physical = <L::PhysicalType as DaftDataType>::ArrayType::full_null(name, dtype, length);
let physical = <L::PhysicalType as DaftDataType>::ArrayType::full_null(
name,
&dtype.to_physical(),
length,
);
Self::new(Field::new(name, dtype.clone()), physical)
}

Expand Down
6 changes: 1 addition & 5 deletions src/daft-core/src/array/ops/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,7 @@ impl ImageArray {
sidecar_data: ImageArraySidecarData,
) -> DaftResult<Self> {
if data.is_empty() {
// Create an all-null array if the data array is empty.
return Ok(ImageArray::new(
Field::new(name, data_type.clone()),
StructArray::empty(name, &data_type.to_physical()),
));
return Ok(ImageArray::full_null(name, &data_type, offsets.len() - 1));
}
let offsets = arrow2::offset::OffsetsBuffer::try_from(offsets)?;
let arrow_dtype: arrow2::datatypes::DataType = T::PRIMITIVE.into();
Expand Down
6 changes: 6 additions & 0 deletions src/daft-core/src/series/ops/downcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use crate::series::array_impl::ArrayWrapper;
use crate::series::Series;
use common_error::DaftResult;

use self::logical::ImageArray;

impl Series {
pub fn downcast<Arr: DaftArrayType>(&self) -> DaftResult<&Arr> {
match self.inner.as_any().downcast_ref() {
Expand Down Expand Up @@ -97,6 +99,10 @@ impl Series {
self.downcast()
}

pub fn image(&self) -> DaftResult<&ImageArray> {
self.downcast()
}

pub fn fixed_size_image(&self) -> DaftResult<&FixedShapeImageArray> {
self.downcast()
}
Expand Down
14 changes: 14 additions & 0 deletions tests/table/image/test_decode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

import daft


def test_decode_all_empty():
df = daft.from_pydict({"foo": [b"not an image", None]})
df = df.with_column("image", df["foo"].image.decode(on_error="null"))
df.collect()

assert df.to_pydict() == {
"foo": [b"not an image", None],
"image": [None, None],
}

0 comments on commit afd6a75

Please sign in to comment.