Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CHORE]: Move image kernel out of daft-core #2804

Merged
merged 13 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ daft-core = {path = "src/daft-core", default-features = false}
daft-csv = {path = "src/daft-csv", default-features = false}
daft-dsl = {path = "src/daft-dsl", default-features = false}
daft-functions = {path = "src/daft-functions", default-features = false}
daft-image = {path = "src/daft-image", default-features = false}
daft-io = {path = "src/daft-io", default-features = false}
daft-json = {path = "src/daft-json", default-features = false}
daft-local-execution = {path = "src/daft-local-execution", default-features = false}
Expand Down Expand Up @@ -41,6 +42,7 @@ python = [
"daft-dsl/python",
"daft-local-execution/python",
"daft-io/python",
"daft-image/python",
"daft-json/python",
"daft-micropartition/python",
"daft-parquet/python",
Expand Down Expand Up @@ -114,6 +116,7 @@ members = [
"src/daft-core",
"src/daft-local-execution",
"src/daft-io",
"src/daft-image",
"src/daft-parquet",
"src/daft-csv",
"src/daft-json",
Expand Down
4 changes: 0 additions & 4 deletions daft/daft.pyi → daft/daft/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1385,10 +1385,6 @@ class PySeries:
def list_slice(self, start: PySeries, end: PySeries | None = None) -> PySeries: ...
def list_sort(self, desc: PySeries) -> PySeries: ...
def map_get(self, key: PySeries) -> PySeries: ...
def image_decode(self, raise_error_on_failure: bool, mode: ImageMode | None = None) -> PySeries: ...
def image_encode(self, image_format: ImageFormat) -> PySeries: ...
def image_resize(self, w: int, h: int) -> PySeries: ...
def image_to_mode(self, mode: ImageMode) -> PySeries: ...
def if_else(self, other: PySeries, predicate: PySeries) -> PySeries: ...
def is_null(self) -> PySeries: ...
def not_null(self) -> PySeries: ...
Expand Down
6 changes: 6 additions & 0 deletions daft/daft/image.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from daft.daft import ImageFormat, ImageMode, PySeries

def decode(s: PySeries, raise_error_on_failure: bool, mode: ImageMode | None = None) -> PySeries: ...
def encode(s: PySeries, image_format: ImageFormat) -> PySeries: ...
def resize(s: PySeries, w: int, h: int) -> PySeries: ...
def to_mode(s: PySeries, mode: ImageMode) -> PySeries: ...
universalmind303 marked this conversation as resolved.
Show resolved Hide resolved
10 changes: 5 additions & 5 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pyarrow as pa

from daft.arrow_utils import ensure_array, ensure_chunked_array
from daft.daft import CountMode, ImageFormat, ImageMode, PySeries
from daft.daft import CountMode, ImageFormat, ImageMode, PySeries, image
from daft.datatype import DataType
from daft.utils import pyarrow_supports_fixed_shape_tensor

Expand Down Expand Up @@ -994,26 +994,26 @@ def decode(
mode = ImageMode.from_mode_string(mode.upper())
if not isinstance(mode, ImageMode):
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}")
return Series._from_pyseries(self._series.image_decode(raise_error_on_failure=raise_on_error, mode=mode))
return Series._from_pyseries(image.decode(self._series, raise_error_on_failure=raise_on_error, mode=mode))

def encode(self, image_format: str | ImageFormat) -> Series:
if isinstance(image_format, str):
image_format = ImageFormat.from_format_string(image_format.upper())
if not isinstance(image_format, ImageFormat):
raise ValueError(f"image_format must be a string or ImageFormat variant, but got: {image_format}")
return Series._from_pyseries(self._series.image_encode(image_format))
return Series._from_pyseries(image.encode(self._series, image_format))

def resize(self, w: int, h: int) -> Series:
if not isinstance(w, int):
raise TypeError(f"expected int for w but got {type(w)}")
if not isinstance(h, int):
raise TypeError(f"expected int for h but got {type(h)}")

return Series._from_pyseries(self._series.image_resize(w, h))
return Series._from_pyseries(image.resize(self._series, w, h))

def to_mode(self, mode: str | ImageMode) -> Series:
if isinstance(mode, str):
mode = ImageMode.from_mode_string(mode.upper())
if not isinstance(mode, ImageMode):
raise ValueError(f"mode must be a string or ImageMode variant, but got: {mode}")
return Series._from_pyseries(self._series.image_to_mode(mode))
return Series._from_pyseries(image.to_mode(self._series, mode))
12 changes: 8 additions & 4 deletions src/common/display/src/table_display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@ pub use comfy_table;

const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS";

pub trait StrValue {
fn str_value(&self, idx: usize) -> String;
}

pub trait HTMLValue {
fn html_value(&self, idx: usize) -> String;
}

// this should be factored out to a common crate
fn create_table_cell(value: &str) -> comfy_table::Cell {
let mut attributes = vec![];
Expand Down Expand Up @@ -45,10 +53,6 @@ pub fn make_schema_vertical_table<S1: ToString, S2: ToString>(
table
}

pub trait StrValue {
fn str_value(&self, idx: usize) -> String;
}

pub fn make_comfy_table<S: AsRef<str>>(
fields: &[S],
columns: Option<&[&dyn StrValue]>,
Expand Down
6 changes: 0 additions & 6 deletions src/daft-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ arrow2 = {workspace = true, features = [
"compute_substring",
"io_ipc"
]}
base64 = "0.22.0"
bincode = {workspace = true}
chrono = {workspace = true}
chrono-tz = {workspace = true}
Expand Down Expand Up @@ -51,11 +50,6 @@ serde_json = {workspace = true}
sketches-ddsketch = {workspace = true}
unicode-normalization = "0.1.23"

[dependencies.image]
default-features = false
features = ["gif", "jpeg", "ico", "png", "tiff", "webp", "bmp", "hdr"]
version = "0.24.7"

[dependencies.numpy]
optional = true
version = "0.19"
Expand Down
166 changes: 166 additions & 0 deletions src/daft-core/src/array/image_array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use std::vec;

use common_error::DaftResult;

use crate::array::prelude::*;
use crate::datatypes::prelude::*;

use crate::series::{IntoSeries, Series};

#[derive(Clone)]
pub struct BBox(pub u32, pub u32, pub u32, pub u32);

impl BBox {
pub fn from_u32_arrow_array(arr: &dyn arrow2::array::Array) -> Self {
assert!(arr.len() == 4);
let mut iter = arr
.as_any()
.downcast_ref::<arrow2::array::UInt32Array>()
.unwrap()
.iter();
BBox(
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
*iter.next().unwrap().unwrap(),
)
}
}

pub struct ImageArraySidecarData {
pub channels: Vec<u16>,
pub heights: Vec<u32>,
pub widths: Vec<u32>,
pub modes: Vec<u8>,
pub validity: Option<arrow2::bitmap::Bitmap>,
}

impl ImageArray {
pub const IMAGE_DATA_IDX: usize = 0;
pub const IMAGE_CHANNEL_IDX: usize = 1;
pub const IMAGE_HEIGHT_IDX: usize = 2;
pub const IMAGE_WIDTH_IDX: usize = 3;
pub const IMAGE_MODE_IDX: usize = 4;

pub fn image_mode(&self) -> &Option<ImageMode> {
match self.data_type() {
DataType::Image(mode) => mode,
_ => panic!("Expected dtype to be Image"),
}
}

pub fn data_array(&self) -> &ListArray {
let array = self.physical.children.get(Self::IMAGE_DATA_IDX).unwrap();
array.list().unwrap()
}

pub fn channel_array(&self) -> &arrow2::array::UInt16Array {
let array = self.physical.children.get(Self::IMAGE_CHANNEL_IDX).unwrap();
array.u16().unwrap().as_arrow()
}

pub fn height_array(&self) -> &arrow2::array::UInt32Array {
let array = self.physical.children.get(Self::IMAGE_HEIGHT_IDX).unwrap();
array.u32().unwrap().as_arrow()
}

pub fn width_array(&self) -> &arrow2::array::UInt32Array {
let array = self.physical.children.get(Self::IMAGE_WIDTH_IDX).unwrap();
array.u32().unwrap().as_arrow()
}

pub fn mode_array(&self) -> &arrow2::array::UInt8Array {
let array = self.physical.children.get(Self::IMAGE_MODE_IDX).unwrap();
array.u8().unwrap().as_arrow()
}

pub fn from_list_array(
name: &str,
data_type: DataType,
data_array: ListArray,
sidecar_data: ImageArraySidecarData,
) -> DaftResult<Self> {
let values: Vec<Series> = vec![
data_array.into_series().rename("data"),
UInt16Array::from((
"channel",
Box::new(
arrow2::array::UInt16Array::from_vec(sidecar_data.channels)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt32Array::from((
"height",
Box::new(
arrow2::array::UInt32Array::from_vec(sidecar_data.heights)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt32Array::from((
"width",
Box::new(
arrow2::array::UInt32Array::from_vec(sidecar_data.widths)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
UInt8Array::from((
"mode",
Box::new(
arrow2::array::UInt8Array::from_vec(sidecar_data.modes)
.with_validity(sidecar_data.validity.clone()),
),
))
.into_series(),
];
let physical_type = data_type.to_physical();
let struct_array = StructArray::new(
Field::new(name, physical_type),
values,
sidecar_data.validity,
);
Ok(ImageArray::new(Field::new(name, data_type), struct_array))
}

pub fn from_vecs<T: arrow2::types::NativeType>(
name: &str,
data_type: DataType,
data: Vec<T>,
offsets: Vec<i64>,
sidecar_data: ImageArraySidecarData,
) -> DaftResult<Self> {
if data.is_empty() {
return Ok(ImageArray::full_null(name, &data_type, offsets.len() - 1));
}
let offsets = arrow2::offset::OffsetsBuffer::try_from(offsets)?;
let arrow_dtype: arrow2::datatypes::DataType = T::PRIMITIVE.into();
if let DataType::Image(Some(mode)) = &data_type {
if mode.get_dtype().to_arrow()? != arrow_dtype {
panic!("Inner value dtype of provided dtype {data_type:?} is inconsistent with inferred value dtype {arrow_dtype:?}");
}
}
let data_array = ListArray::new(
Field::new("data", DataType::List(Box::new((&arrow_dtype).into()))),
Series::try_from((
"data",
Box::new(arrow2::array::PrimitiveArray::from_vec(data))
as Box<dyn arrow2::array::Array>,
))?,
offsets,
sidecar_data.validity.clone(),
);

Self::from_list_array(name, data_type, data_array, sidecar_data)
}
}

impl FixedShapeImageArray {
pub fn image_mode(&self) -> &ImageMode {
match self.data_type() {
DataType::FixedShapeImage(mode, _, _) => mode,
other => panic!("Expected dtype to be Image, got {other:?}"),
}
}
}
1 change: 1 addition & 0 deletions src/daft-core/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod fixed_size_list_array;
pub mod from;
pub mod growable;
pub mod image_array;
pub mod iterator;
mod list_array;
pub mod ops;
Expand Down
3 changes: 2 additions & 1 deletion src/daft-core/src/array/ops/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use super::as_arrow::AsArrow;
use crate::{
array::{
growable::make_growable,
ops::{from_arrow::FromArrow, full::FullNull, image::ImageArraySidecarData},
image_array::ImageArraySidecarData,
ops::{from_arrow::FromArrow, full::FullNull},
DataArray, FixedSizeListArray, ListArray, StructArray,
},
datatypes::{
Expand Down
Loading
Loading