diff --git a/Cargo.lock b/Cargo.lock index 8b6b2435a7..ceaccdbd77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1290,6 +1290,14 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "common-arrow-ffi" +version = "0.3.0-dev0" +dependencies = [ + "arrow2", + "pyo3", +] + [[package]] name = "common-daft-config" version = "0.3.0-dev0" @@ -1304,6 +1312,7 @@ dependencies = [ name = "common-display" version = "0.3.0-dev0" dependencies = [ + "comfy-table 7.1.1", "indexmap 2.3.0", "pyo3", "terminal_size", @@ -1387,6 +1396,10 @@ dependencies = [ "common-error", ] +[[package]] +name = "common-version" +version = "0.3.0-dev0" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1648,6 +1661,7 @@ dependencies = [ "common-resource-request", "common-system-info", "common-tracing", + "common-version", "daft-compression", "daft-core", "daft-csv", @@ -1695,12 +1709,13 @@ dependencies = [ "chrono", "chrono-tz", "comfy-table 7.1.1", - "common-daft-config", + "common-arrow-ffi", "common-display", "common-error", "common-hashable-float-wrapper", "common-py-serde", "daft-minhash", + "daft-schema", "daft-sketch", "fastrand 2.1.0", "fnv", @@ -1717,7 +1732,6 @@ dependencies = [ "log", "mur3", "ndarray", - "num-derive", "num-traits", "numpy", "pyo3", @@ -1957,6 +1971,7 @@ dependencies = [ "async-stream", "bincode", "bytes", + "common-arrow-ffi", "common-error", "crossbeam-channel", "daft-core", @@ -2009,6 +2024,7 @@ dependencies = [ "daft-functions", "daft-io", "daft-scan", + "daft-schema", "daft-table", "indexmap 2.3.0", "itertools 0.11.0", @@ -2036,6 +2052,7 @@ dependencies = [ "daft-io", "daft-json", "daft-parquet", + "daft-schema", "daft-stats", "daft-table", "futures", @@ -2066,6 +2083,25 @@ dependencies = [ "serde", ] +[[package]] +name = "daft-schema" +version = "0.3.0-dev0" +dependencies = [ + "arrow2", + "common-arrow-ffi", + "common-display", + "common-error", + "common-py-serde", + "common-version", + "html-escape", + "indexmap 2.3.0", + "num-derive", + "num-traits", + "pyo3", + "serde", + "serde_json", +] + [[package]] name = "daft-sketch" version = "0.3.0-dev0" @@ -2114,6 +2150,8 @@ version = "0.3.0-dev0" dependencies = [ "arrow2", "comfy-table 7.1.1", + "common-arrow-ffi", + "common-display", "common-error", "daft-core", "daft-dsl", diff --git a/Cargo.toml b/Cargo.toml index 4046665e77..f884972e00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ common-hashable-float-wrapper = {path = "src/common/hashable-float-wrapper", def common-resource-request = {path = "src/common/resource-request", default-features = false} common-system-info = {path = "src/common/system-info", default-features = false} common-tracing = {path = "src/common/tracing", default-features = false} +common-version = {path = "src/common/version", default-features = false} daft-compression = {path = "src/daft-compression", default-features = false} daft-core = {path = "src/daft-core", default-features = false} daft-csv = {path = "src/daft-csv", default-features = false} diff --git a/src/common/arrow-ffi/Cargo.toml b/src/common/arrow-ffi/Cargo.toml new file mode 100644 index 0000000000..b45af25939 --- /dev/null +++ b/src/common/arrow-ffi/Cargo.toml @@ -0,0 +1,11 @@ +[dependencies] +arrow2 = {workspace = true} +pyo3 = {workspace = true, optional = true} + +[features] +python = ["dep:pyo3"] + +[package] +edition = {workspace = true} +name = "common-arrow-ffi" +version = {workspace = true} diff --git a/src/daft-core/src/ffi.rs b/src/common/arrow-ffi/src/lib.rs similarity index 97% rename from src/daft-core/src/ffi.rs rename to src/common/arrow-ffi/src/lib.rs index e7084c9a7a..24d57cfacd 100644 --- a/src/daft-core/src/ffi.rs +++ b/src/common/arrow-ffi/src/lib.rs @@ -2,11 +2,14 @@ use std::io::Cursor; use arrow2::{array::Array, datatypes::Field, ffi}; +#[cfg(feature = "python")] use pyo3::ffi::Py_uintptr_t; +#[cfg(feature = "python")] use pyo3::prelude::*; pub type ArrayRef = Box; +#[cfg(feature = "python")] pub fn array_to_rust(arrow_array: &PyAny) -> PyResult { // prepare a pointer to receive the Array struct let array = Box::new(ffi::ArrowArray::empty()); @@ -28,7 +31,7 @@ pub fn array_to_rust(arrow_array: &PyAny) -> PyResult { Ok(array) } } - +#[cfg(feature = "python")] pub fn to_py_array(array: ArrayRef, py: Python, pyarrow: &PyModule) -> PyResult { let schema = Box::new(ffi::export_field_to_c(&Field::new( "", @@ -53,6 +56,7 @@ pub fn to_py_array(array: ArrayRef, py: Python, pyarrow: &PyModule) -> PyResult< Ok(array.to_object(py)) } +#[cfg(feature = "python")] pub fn field_to_py( field: &arrow2::datatypes::Field, py: Python, @@ -69,6 +73,7 @@ pub fn field_to_py( Ok(field.to_object(py)) } +#[cfg(feature = "python")] pub fn dtype_to_py( dtype: &arrow2::datatypes::DataType, py: Python, diff --git a/src/common/daft-config/src/lib.rs b/src/common/daft-config/src/lib.rs index c423d3aaaa..c25ff4ca3f 100644 --- a/src/common/daft-config/src/lib.rs +++ b/src/common/daft-config/src/lib.rs @@ -2,9 +2,6 @@ use common_io_config::IOConfig; use serde::{Deserialize, Serialize}; -/// Environment variables for Daft to use when formatting displays. -pub const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS"; - /// Configurations for Daft to use during the building of a Dataframe's plan. /// /// 1. Creation of a Dataframe including any file listing and schema inference that needs to happen. Note diff --git a/src/common/display/Cargo.toml b/src/common/display/Cargo.toml index 6c84c2827d..3fe4ea2774 100644 --- a/src/common/display/Cargo.toml +++ b/src/common/display/Cargo.toml @@ -1,4 +1,5 @@ [dependencies] +comfy-table = {workspace = true} indexmap = {workspace = true} pyo3 = {workspace = true, optional = true} terminal_size = {version = "0.3.0"} diff --git a/src/common/display/src/lib.rs b/src/common/display/src/lib.rs index 3d98362082..07af834bd4 100644 --- a/src/common/display/src/lib.rs +++ b/src/common/display/src/lib.rs @@ -1,5 +1,7 @@ +#![feature(let_chains)] pub mod ascii; pub mod mermaid; +pub mod table_display; pub mod tree; pub mod utils; diff --git a/src/daft-core/src/utils/display_table.rs b/src/common/display/src/table_display.rs similarity index 54% rename from src/daft-core/src/utils/display_table.rs rename to src/common/display/src/table_display.rs index 787e6fd52d..49c9bc5462 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/common/display/src/table_display.rs @@ -1,10 +1,8 @@ -use crate::{ - datatypes::{Field, TimeUnit}, - series::Series, -}; -use common_daft_config::BOLD_TABLE_HEADERS_IN_DISPLAY; -use itertools::Itertools; +pub use comfy_table; +const BOLD_TABLE_HEADERS_IN_DISPLAY: &str = "DAFT_BOLD_TABLE_HEADERS"; + +// this should be factored out to a common crate fn create_table_cell(value: &str) -> comfy_table::Cell { let mut attributes = vec![]; if std::env::var(BOLD_TABLE_HEADERS_IN_DISPLAY) @@ -22,110 +20,7 @@ fn create_table_cell(value: &str) -> comfy_table::Cell { cell } -pub fn display_date32(val: i32) -> String { - let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let date = if val.is_positive() { - epoch_date + chrono::naive::Days::new(val as u64) - } else { - epoch_date - chrono::naive::Days::new(val.unsigned_abs() as u64) - }; - format!("{date}") -} - -pub fn display_time64(val: i64, unit: &TimeUnit) -> String { - let time = match unit { - TimeUnit::Nanoseconds => Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt( - (val / 1_000_000_000) as u32, - (val % 1_000_000_000) as u32, - ) - .unwrap()), - TimeUnit::Microseconds => Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt( - (val / 1_000_000) as u32, - ((val % 1_000_000) * 1_000) as u32, - ) - .unwrap()), - TimeUnit::Milliseconds => { - let seconds = u32::try_from(val / 1_000); - let nanoseconds = u32::try_from((val % 1_000) * 1_000_000); - match (seconds, nanoseconds) { - (Ok(secs), Ok(nano)) => { - Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt(secs, nano).unwrap()) - } - (Err(e), _) => Err(e), - (_, Err(e)) => Err(e), - } - } - TimeUnit::Seconds => { - let seconds = u32::try_from(val); - match seconds { - Ok(secs) => { - Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt(secs, 0).unwrap()) - } - Err(e) => Err(e), - } - } - }; - - match time { - Ok(time) => format!("{time}"), - Err(e) => format!("Display Error: {e}"), - } -} - -pub fn display_timestamp(val: i64, unit: &TimeUnit, timezone: &Option) -> String { - use crate::array::ops::cast::{ - timestamp_to_str_naive, timestamp_to_str_offset, timestamp_to_str_tz, - }; - - timezone.as_ref().map_or_else( - || timestamp_to_str_naive(val, unit), - |timezone| { - // In arrow, timezone string can be either: - // 1. a fixed offset "-07:00", parsed using parse_offset, or - // 2. a timezone name e.g. "America/Los_Angeles", parsed using parse_offset_tz. - if let Ok(offset) = arrow2::temporal_conversions::parse_offset(timezone) { - timestamp_to_str_offset(val, unit, &offset) - } else if let Ok(tz) = arrow2::temporal_conversions::parse_offset_tz(timezone) { - timestamp_to_str_tz(val, unit, &tz) - } else { - panic!("Unable to parse timezone string {}", timezone) - } - }, - ) -} - -pub fn display_decimal128(val: i128, _precision: u8, scale: i8) -> String { - if scale < 0 { - unimplemented!(); - } else { - let modulus = i128::pow(10, scale as u32); - let integral = val / modulus; - if scale == 0 { - format!("{}", integral) - } else { - let sign = if val < 0 { "-" } else { "" }; - let integral = integral.abs(); - let decimals = (val % modulus).abs(); - let scale = scale as usize; - format!("{}{}.{:0scale$}", sign, integral, decimals) - } - } -} - -pub fn display_series_literal(series: &Series) -> String { - if !series.is_empty() { - format!( - "[{}]", - (0..series.len()) - .map(|i| series.str_value(i).unwrap()) - .join(", ") - ) - } else { - "[]".to_string() - } -} - -pub fn make_schema_vertical_table>(fields: &[F]) -> comfy_table::Table { +pub fn make_schema_vertical_table(names: &[S], dtypes: &[S]) -> comfy_table::Table { let mut table = comfy_table::Table::new(); let default_width_if_no_tty = 120usize; @@ -140,19 +35,21 @@ pub fn make_schema_vertical_table>(fields: &[F]) -> comfy_table: let header = vec![create_table_cell("Column Name"), create_table_cell("Type")]; table.set_header(header); - - for f in fields.iter() { - table.add_row(vec![ - f.as_ref().name.to_string(), - format!("{}", f.as_ref().dtype), - ]); + assert_eq!(names.len(), dtypes.len()); + for (name, dtype) in names.iter().zip(dtypes.iter()) { + table.add_row(vec![name.to_string(), dtype.to_string()]); } table } -pub fn make_comfy_table>( - fields: &[F], - columns: Option<&[&Series]>, +pub trait StrValue { + fn str_value(&self, idx: usize) -> String; +} + +pub fn make_comfy_table>( + fields: &[S], + columns: Option<&[&dyn StrValue]>, + num_rows: Option, max_col_width: Option, ) -> comfy_table::Table { let mut table = comfy_table::Table::new(); @@ -191,13 +88,7 @@ pub fn make_comfy_table>( let mut header = fields .iter() .take(head_cols) - .map(|field| { - create_table_cell(&format!( - "{}\n---\n{}", - field.as_ref().name, - field.as_ref().dtype - )) - }) + .map(|field| create_table_cell(field.as_ref())) .collect::>(); if tail_cols > 0 { let unseen_cols = num_columns - (head_cols + tail_cols); @@ -209,20 +100,19 @@ pub fn make_comfy_table>( )) .set_alignment(comfy_table::CellAlignment::Center), ); - header.extend(fields.iter().skip(num_columns - tail_cols).map(|field| { - create_table_cell(&format!( - "{}\n---\n{}", - field.as_ref().name, - field.as_ref().dtype - )) - })); + header.extend( + fields + .iter() + .skip(num_columns - tail_cols) + .map(|field| create_table_cell(field.as_ref())), + ); } if let Some(columns) = columns && !columns.is_empty() { table.set_header(header); - let len = columns.first().unwrap().len(); + let len = num_rows.expect("if columns are set, so should `num_rows`"); const TOTAL_ROWS: usize = 10; let head_rows; let tail_rows; @@ -239,7 +129,7 @@ pub fn make_comfy_table>( let all_cols = columns .iter() .map(|s| { - let mut str_val = s.str_value(i).unwrap(); + let mut str_val = s.str_value(i); if let Some(max_col_width) = max_col_width { if str_val.len() > max_col_width - DOTS.len() { str_val = format!( @@ -273,7 +163,7 @@ pub fn make_comfy_table>( let all_cols = columns .iter() .map(|s| { - let mut str_val = s.str_value(i).unwrap(); + let mut str_val = s.str_value(i); if let Some(max_col_width) = max_col_width { if str_val.len() > max_col_width - DOTS.len() { str_val = format!( diff --git a/src/common/version/Cargo.toml b/src/common/version/Cargo.toml new file mode 100644 index 0000000000..69f162f811 --- /dev/null +++ b/src/common/version/Cargo.toml @@ -0,0 +1,4 @@ +[package] +edition = {workspace = true} +name = "common-version" +version = {workspace = true} diff --git a/src/common/version/src/lib.rs b/src/common/version/src/lib.rs new file mode 100644 index 0000000000..56a2893a77 --- /dev/null +++ b/src/common/version/src/lib.rs @@ -0,0 +1,9 @@ +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); +pub const BUILD_TYPE_DEV: &str = "dev"; +pub const DAFT_BUILD_TYPE: &str = { + let env_build_type: Option<&str> = option_env!("RUST_DAFT_PKG_BUILD_TYPE"); + match env_build_type { + Some(val) => val, + None => BUILD_TYPE_DEV, + } +}; diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 3e448aea4d..683572157d 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -20,12 +20,13 @@ bincode = {workspace = true} chrono = {workspace = true} chrono-tz = {workspace = true} comfy-table = {workspace = true} -common-daft-config = {path = "../common/daft-config", default-features = false} +common-arrow-ffi = {path = "../common/arrow-ffi", default-features = false} common-display = {path = "../common/display", default-features = false} common-error = {path = "../common/error", default-features = false} common-hashable-float-wrapper = {path = "../common/hashable-float-wrapper"} common-py-serde = {path = "../common/py-serde", default-features = false} daft-minhash = {path = "../daft-minhash", default-features = false} +daft-schema = {path = "../daft-schema", default-features = false} daft-sketch = {path = "../daft-sketch", default-features = false} fastrand = "2.1.0" fnv = "1.0.7" @@ -41,7 +42,6 @@ lazy_static = {workspace = true} log = {workspace = true} mur3 = "0.1.0" ndarray = "0.15.6" -num-derive = {workspace = true} num-traits = {workspace = true} pyo3 = {workspace = true, optional = true} regex = {workspace = true} @@ -68,7 +68,9 @@ python = [ "dep:pyo3", "dep:numpy", "common-error/python", - "common-py-serde/python" + "common-py-serde/python", + "common-arrow-ffi/python", + "daft-schema/python" ] [package] diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index e2d2110456..582b65212d 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -17,7 +17,7 @@ use crate::{ Int32Array, Int64Array, TimeUnit, UInt64Array, Utf8Array, }, series::{IntoSeries, Series}, - utils::display_table::display_time64, + utils::display::display_time64, with_match_daft_logical_primitive_types, }; @@ -38,8 +38,8 @@ use indexmap::IndexMap; use { crate::array::pseudo_arrow::PseudoArrowArray, crate::datatypes::PythonArray, - crate::ffi, crate::with_match_numeric_daft_types, + common_arrow_ffi as ffi, ndarray::IntoDimension, num_traits::{NumCast, ToPrimitive}, numpy::{PyArray3, PyReadonlyArrayDyn}, @@ -519,7 +519,7 @@ fn append_values_from_numpy< values_vec: &mut Vec, shapes_vec: &mut Vec, ) -> DaftResult<(usize, usize)> { - use crate::python::PyDataType; + use daft_schema::python::PyDataType; use std::num::Wrapping; let np_dtype = pyarray.getattr(pyo3::intern!(pyarray.py(), "dtype"))?; diff --git a/src/daft-core/src/array/ops/image.rs b/src/daft-core/src/array/ops/image.rs index 66dcdaeba5..37b3c92a00 100644 --- a/src/daft-core/src/array/ops/image.rs +++ b/src/daft-core/src/array/ops/image.rs @@ -118,6 +118,79 @@ impl Seek for CountingWriter { } } +struct Wrap(T); + +impl From for Wrap { + fn from(image_format: image::ImageFormat) -> Self { + Wrap(match image_format { + image::ImageFormat::Png => ImageFormat::PNG, + image::ImageFormat::Jpeg => ImageFormat::JPEG, + image::ImageFormat::Tiff => ImageFormat::TIFF, + image::ImageFormat::Gif => ImageFormat::GIF, + image::ImageFormat::Bmp => ImageFormat::BMP, + _ => unimplemented!("Image format {:?} is not supported", image_format), + }) + } +} + +impl From> for image::ImageFormat { + fn from(image_format: Wrap) -> Self { + match image_format.0 { + ImageFormat::PNG => image::ImageFormat::Png, + ImageFormat::JPEG => image::ImageFormat::Jpeg, + ImageFormat::TIFF => image::ImageFormat::Tiff, + ImageFormat::GIF => image::ImageFormat::Gif, + ImageFormat::BMP => image::ImageFormat::Bmp, + } + } +} + +impl From> for image::ColorType { + fn from(image_mode: Wrap) -> image::ColorType { + use image::ColorType; + use ImageMode::*; + + match image_mode.0 { + L => ColorType::L8, + LA => ColorType::La8, + RGB => ColorType::Rgb8, + RGBA => ColorType::Rgba8, + L16 => ColorType::L16, + LA16 => ColorType::La16, + RGB16 => ColorType::Rgb16, + RGBA16 => ColorType::Rgba16, + RGB32F => ColorType::Rgb32F, + RGBA32F => ColorType::Rgba32F, + } + } +} + +impl TryFrom for Wrap { + type Error = DaftError; + + fn try_from(color: image::ColorType) -> DaftResult { + use image::ColorType; + use ImageMode::*; + + Ok(Wrap(match color { + ColorType::L8 => Ok(L), + ColorType::La8 => Ok(LA), + ColorType::Rgb8 => Ok(RGB), + ColorType::Rgba8 => Ok(RGBA), + ColorType::L16 => Ok(L16), + ColorType::La16 => Ok(LA16), + ColorType::Rgb16 => Ok(RGB16), + ColorType::Rgba16 => Ok(RGBA16), + ColorType::Rgb32F => Ok(RGB32F), + ColorType::Rgba32F => Ok(RGBA32F), + _ => Err(DaftError::ValueError(format!( + "Color type {:?} is not supported.", + color + ))), + }?)) + } +} + impl<'a> DaftImageBuffer<'a> { pub fn height(&self) -> u32 { with_method_on_image_buffer!(self, height) @@ -139,7 +212,7 @@ impl<'a> DaftImageBuffer<'a> { } pub fn color(&self) -> ColorType { - self.mode().into() + Wrap(self.mode()).into() } pub fn mode(&self) -> ImageMode { @@ -175,7 +248,7 @@ impl<'a> DaftImageBuffer<'a> { self.width(), self.height(), self.color(), - image::ImageFormat::from(image_format), + image::ImageFormat::from(Wrap(image_format)), ) .map_err(|e| { DaftError::ValueError(format!( diff --git a/src/daft-core/src/array/ops/repr.rs b/src/daft-core/src/array/ops/repr.rs index f48dc7d000..aeedb30610 100644 --- a/src/daft-core/src/array/ops/repr.rs +++ b/src/daft-core/src/array/ops/repr.rs @@ -1,4 +1,5 @@ use base64::Engine; +use common_display::table_display::StrValue; use crate::{ array::{DataArray, FixedSizeListArray, ListArray, StructArray}, @@ -12,7 +13,7 @@ use crate::{ ImageFormat, NullArray, UInt64Array, Utf8Array, }, series::Series, - utils::display_table::{display_date32, display_decimal128, display_time64, display_timestamp}, + utils::display::{display_date32, display_decimal128, display_time64, display_timestamp}, with_match_daft_types, }; use common_error::DaftResult; @@ -326,7 +327,7 @@ impl StructArray { .iter() .zip(self.children.iter()) .filter(|(f, _)| !f.name.is_empty() && f.dtype != DataType::Null) - .map(|(f, s)| Ok(format!("{}: {},\n", f.name.as_str(), s.str_value(idx)?))) + .map(|(f, s)| Ok(format!("{}: {},\n", f.name.as_str(), s.str_value(idx)))) .collect::>>()?; let mut result = "{".to_string(); for line in fields_to_strs { diff --git a/src/daft-core/src/array/ops/utf8.rs b/src/daft-core/src/array/ops/utf8.rs index ba77f078de..a753b9f208 100644 --- a/src/daft-core/src/array/ops/utf8.rs +++ b/src/daft-core/src/array/ops/utf8.rs @@ -937,7 +937,7 @@ impl Utf8Array { pub fn to_datetime(&self, format: &str, timezone: Option<&str>) -> DaftResult { let len = self.len(); let self_iter = self.as_arrow().iter(); - let timeunit = crate::datatypes::utils::infer_timeunit_from_format_string(format); + let timeunit = daft_schema::time_unit::infer_timeunit_from_format_string(format); let arrow_result = self_iter .map(|val| match val { diff --git a/src/daft-core/src/datatypes/binary_ops.rs b/src/daft-core/src/datatypes/infer_datatype.rs similarity index 86% rename from src/daft-core/src/datatypes/binary_ops.rs rename to src/daft-core/src/datatypes/infer_datatype.rs index bcbdc2d21a..730e8c6aeb 100644 --- a/src/daft-core/src/datatypes/binary_ops.rs +++ b/src/daft-core/src/datatypes/infer_datatype.rs @@ -1,16 +1,42 @@ -use std::ops::{Add, Div, Mul, Rem, Shl, Shr, Sub}; +use std::{ + fmt::Display, + ops::{Add, Div, Mul, Rem, Shl, Shr, Sub}, +}; use common_error::{DaftError, DaftResult}; -use crate::{impl_binary_trait_by_reference, utils::supertype::try_get_supertype}; +use crate::utils::supertype::try_get_supertype; use super::DataType; -impl DataType { +// This is a stopgap to keep this logic separated from the DataTypes themselves +// Once we convert daft-dsl to a root level crate, this logic should move there +pub struct InferDataType<'a>(&'a DataType); + +impl<'a> Display for InferDataType<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} +impl<'a> From<&'a DataType> for InferDataType<'a> { + fn from(value: &'a DataType) -> Self { + InferDataType(value) + } +} + +impl<'a> AsRef for InferDataType<'a> { + fn as_ref(&self) -> &DataType { + self.0 + } +} + +impl<'a> InferDataType<'a> { pub fn logical_op(&self, other: &Self) -> DaftResult { // Whether a logical op (and, or, xor) is supported between the two types. use DataType::*; - match (self, other) { + let left = self.0; + let other = other.0; + match (left, other) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Boolean), (Boolean, Boolean) | (Boolean, Null) | (Null, Boolean) => Ok(Boolean), @@ -19,7 +45,7 @@ impl DataType { if dtype.is_floating() { Err(DaftError::TypeError(format!( "Cannot perform logic on types: {}, {}", - self, other + left, other ))) } else { Ok(dtype) @@ -29,7 +55,7 @@ impl DataType { (s, o) if (s.is_null() && o.is_integer()) => Ok(o.clone()), _ => Err(DaftError::TypeError(format!( "Cannot perform logic on types: {}, {}", - self, other + left, other ))), } } @@ -43,19 +69,22 @@ impl DataType { // - the output type, // - an optional intermediate type // - the type at which the comparison should be performed. + + let left = &self.0; + let other = &other.0; let evaluator = || { use DataType::*; - match (self, other) { + match (left, other) { (s, o) if s == o => Ok((Boolean, None, s.to_physical())), (Utf8, o) | (o, Utf8) if o.is_numeric() => Err(DaftError::TypeError(format!( "Cannot perform comparison on Utf8 and numeric type.\ntypes: {}, {}", - self, other + left, other ))), (s, o) if s.is_physical() && o.is_physical() => { Ok((Boolean, None, try_physical_supertype(s, o)?)) } (Timestamp(..), Timestamp(..)) => { - let intermediate_type = try_get_supertype(self, other)?; + let intermediate_type = try_get_supertype(left, other)?; let pt = intermediate_type.to_physical(); Ok((Boolean, Some(intermediate_type), pt)) } @@ -66,7 +95,7 @@ impl DataType { } _ => Err(DaftError::TypeError(format!( "Cannot perform comparison on types: {}, {}", - self, other + left, other ))), } }; @@ -74,7 +103,7 @@ impl DataType { evaluator().map_err(|err| { DaftError::TypeError(format!( "Cannot perform comparison on types: {}, {}\nDetails:\n{err}", - self, other + left, other )) }) } @@ -87,13 +116,14 @@ impl DataType { } } -impl Add for &DataType { +impl<'a> Add for InferDataType<'a> { type Output = DaftResult; fn add(self, other: Self) -> Self::Output { use DataType::*; - try_numeric_supertype(self, other).or(try_fixed_shape_numeric_datatype(self, other, |l, r| {l + r})).or( - match (self, other) { + + try_numeric_supertype(self.0, other.0).or(try_fixed_shape_numeric_datatype(self.0, other.0, |l, r| {InferDataType::from(l) + InferDataType::from(r)})).or( + match (self.0, other.0) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Python), (Timestamp(t_unit, tz), Duration(d_unit)) @@ -144,13 +174,13 @@ impl Add for &DataType { } } -impl Sub for &DataType { +impl<'a> Sub for InferDataType<'a> { type Output = DaftResult; fn sub(self, other: Self) -> Self::Output { use DataType::*; - try_numeric_supertype(self, other).or(try_fixed_shape_numeric_datatype(self, other, |l, r| {l - r})).or( - match (self, other) { + try_numeric_supertype(self.0, other.0).or(try_fixed_shape_numeric_datatype(self.0, other.0, |l, r| {InferDataType::from(l) - InferDataType::from(r)})).or( + match (self.0, other.0) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Python), (Timestamp(t_unit, tz), Duration(d_unit)) @@ -179,12 +209,12 @@ impl Sub for &DataType { } } -impl Div for &DataType { +impl<'a> Div for InferDataType<'a> { type Output = DaftResult; fn div(self, other: Self) -> Self::Output { use DataType::*; - match (self, other) { + match (&self.0, &other.0) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Python), (s, o) if s.is_numeric() && o.is_numeric() => Ok(Float64), @@ -193,18 +223,22 @@ impl Div for &DataType { self, other ))), } - .or(try_fixed_shape_numeric_datatype(self, other, |l, r| l / r)) + .or(try_fixed_shape_numeric_datatype(self.0, other.0, |l, r| { + InferDataType::from(l) / InferDataType::from(r) + })) } } -impl Mul for &DataType { +impl<'a> Mul for InferDataType<'a> { type Output = DaftResult; fn mul(self, other: Self) -> Self::Output { use DataType::*; - try_numeric_supertype(self, other) - .or(try_fixed_shape_numeric_datatype(self, other, |l, r| l * r)) - .or(match (self, other) { + try_numeric_supertype(self.0, other.0) + .or(try_fixed_shape_numeric_datatype(self.0, other.0, |l, r| { + InferDataType::from(l) * InferDataType::from(r) + })) + .or(match (self.0, other.0) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Python), _ => Err(DaftError::TypeError(format!( @@ -215,14 +249,16 @@ impl Mul for &DataType { } } -impl Rem for &DataType { +impl<'a> Rem for InferDataType<'a> { type Output = DaftResult; fn rem(self, other: Self) -> Self::Output { use DataType::*; - try_numeric_supertype(self, other) - .or(try_fixed_shape_numeric_datatype(self, other, |l, r| l % r)) - .or(match (self, other) { + try_numeric_supertype(self.0, other.0) + .or(try_fixed_shape_numeric_datatype(self.0, other.0, |l, r| { + InferDataType::from(l) % InferDataType::from(r) + })) + .or(match (self.0, other.0) { #[cfg(feature = "python")] (Python, _) | (_, Python) => Ok(Python), _ => Err(DaftError::TypeError(format!( @@ -233,11 +269,11 @@ impl Rem for &DataType { } } -impl Shl for &DataType { +impl<'a> Shl for InferDataType<'a> { type Output = DaftResult; fn shl(self, rhs: Self) -> Self::Output { - match (self, rhs) { + match (self.0, rhs.0) { (s, o) if s.is_integer() && o.is_integer() => Ok(s.clone()), _ => Err(DaftError::TypeError(format!( "Cannot operate shift left on types: {}, {}", @@ -247,11 +283,11 @@ impl Shl for &DataType { } } -impl Shr for &DataType { +impl<'a> Shr for InferDataType<'a> { type Output = DaftResult; fn shr(self, rhs: Self) -> Self::Output { - match (self, rhs) { + match (self.0, rhs.0) { (s, o) if s.is_integer() && o.is_integer() => Ok(s.clone()), _ => Err(DaftError::TypeError(format!( "Cannot operate shift right on types: {}, {}", @@ -261,12 +297,6 @@ impl Shr for &DataType { } } -impl_binary_trait_by_reference!(DataType, Add, add); -impl_binary_trait_by_reference!(DataType, Sub, sub); -impl_binary_trait_by_reference!(DataType, Mul, mul); -impl_binary_trait_by_reference!(DataType, Div, div); -impl_binary_trait_by_reference!(DataType, Rem, rem); - pub fn try_physical_supertype(l: &DataType, r: &DataType) -> DaftResult { // Given two physical data types, // get the physical data type that they can both be casted to. diff --git a/src/daft-core/src/datatypes/mod.rs b/src/daft-core/src/datatypes/mod.rs index f3e61806e8..a153bc4c0a 100644 --- a/src/daft-core/src/datatypes/mod.rs +++ b/src/daft-core/src/datatypes/mod.rs @@ -1,12 +1,8 @@ mod agg_ops; -mod binary_ops; -mod dtype; -mod field; -mod image_format; -mod image_mode; +mod infer_datatype; mod matching; -mod time_unit; +pub use infer_datatype::InferDataType; pub mod prelude; use crate::array::{ops::as_arrow::AsArrow, ListArray, StructArray}; pub use crate::array::{DataArray, FixedSizeListArray}; @@ -15,17 +11,19 @@ use arrow2::{ compute::comparison::Simd8, types::{simd::Simd, NativeType}, }; -pub use binary_ops::try_physical_supertype; -pub use dtype::DataType; -pub use field::Field; -pub use field::FieldID; -pub use field::FieldRef; -pub use image_format::ImageFormat; -pub use image_mode::ImageMode; +pub use infer_datatype::try_physical_supertype; use num_traits::{Bounded, Float, FromPrimitive, Num, NumCast, ToPrimitive, Zero}; use serde::Serialize; use std::ops::{Add, Div, Mul, Rem, Sub}; -pub use time_unit::TimeUnit; + +pub use daft_schema::field::{Field, FieldID, FieldRef}; + +pub use daft_schema::image_format::ImageFormat; +pub use daft_schema::image_mode::ImageMode; +pub use daft_schema::time_unit::{infer_timeunit_from_format_string, TimeUnit}; + +// Import DataType enum +pub use daft_schema::dtype::DataType; pub mod logical; @@ -364,7 +362,3 @@ impl DataArray { self.as_arrow().values().as_slice() } } - -pub mod utils { - pub use super::time_unit::infer_timeunit_from_format_string; -} diff --git a/src/daft-core/src/datatypes/prelude.rs b/src/daft-core/src/datatypes/prelude.rs index ed6cf0bb7e..64c8021e9b 100644 --- a/src/daft-core/src/datatypes/prelude.rs +++ b/src/daft-core/src/datatypes/prelude.rs @@ -6,10 +6,15 @@ pub use super::{ }; // Import utility types and structs -pub use super::{Field, FieldID, FieldRef, ImageFormat, ImageMode, TimeUnit}; + +pub use daft_schema::field::{Field, FieldID, FieldRef}; + +pub use daft_schema::image_format::ImageFormat; +pub use daft_schema::image_mode::ImageMode; +pub use daft_schema::time_unit::TimeUnit; // Import DataType enum -pub use super::DataType; +pub use daft_schema::dtype::DataType; // Conditionally import PythonArray if the 'python' feature is enabled #[cfg(feature = "python")] diff --git a/src/daft-core/src/lib.rs b/src/daft-core/src/lib.rs index 2ead97612f..50ad0063e4 100644 --- a/src/daft-core/src/lib.rs +++ b/src/daft-core/src/lib.rs @@ -6,13 +6,10 @@ pub mod array; pub mod count_mode; pub mod datatypes; -#[cfg(feature = "python")] -pub mod ffi; pub mod join; pub mod kernels; #[cfg(feature = "python")] pub mod python; -pub mod schema; pub mod series; pub mod utils; #[cfg(feature = "python")] @@ -20,16 +17,6 @@ use pyo3::prelude::*; pub mod prelude; -pub const VERSION: &str = env!("CARGO_PKG_VERSION"); -pub const BUILD_TYPE_DEV: &str = "dev"; -pub const DAFT_BUILD_TYPE: &str = { - let env_build_type: Option<&str> = option_env!("RUST_DAFT_PKG_BUILD_TYPE"); - match env_build_type { - Some(val) => val, - None => BUILD_TYPE_DEV, - } -}; - #[cfg(feature = "python")] pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { parent.add_class::()?; diff --git a/src/daft-core/src/prelude.rs b/src/daft-core/src/prelude.rs index c4c59273e6..3bc5ed0f72 100644 --- a/src/daft-core/src/prelude.rs +++ b/src/daft-core/src/prelude.rs @@ -13,15 +13,11 @@ pub use crate::array::prelude::*; // Re-export count mode enum pub use crate::count_mode::CountMode; -pub use crate::schema::{Schema, SchemaRef}; +pub use daft_schema::schema::{Schema, SchemaRef}; // Re-export join-related types pub use crate::join::{JoinStrategy, JoinType}; -// Re-export version information -pub use crate::DAFT_BUILD_TYPE; -pub use crate::VERSION; - // You might want to include a glob import for users who want everything pub mod all { pub use super::*; diff --git a/src/daft-core/src/python/mod.rs b/src/daft-core/src/python/mod.rs index e69fce44bf..5b4c4f57f7 100644 --- a/src/daft-core/src/python/mod.rs +++ b/src/daft-core/src/python/mod.rs @@ -1,22 +1,12 @@ use pyo3::prelude::*; -pub mod datatype; -pub mod field; -pub mod schema; pub mod series; -use crate::datatypes::ImageFormat; -use crate::datatypes::ImageMode; -pub use datatype::PyDataType; pub use series::PySeries; -pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { - parent.add_class::()?; - parent.add_class::()?; - parent.add_class::()?; - parent.add_class::()?; - parent.add_class::()?; - parent.add_class::()?; - parent.add_class::()?; +pub use daft_schema::python::{field::PyField, schema::PySchema, PyDataType, PyTimeUnit}; +pub fn register_modules(py: Python, parent: &PyModule) -> PyResult<()> { + parent.add_class::()?; + daft_schema::python::register_modules(py, parent)?; Ok(()) } diff --git a/src/daft-core/src/python/series.rs b/src/daft-core/src/python/series.rs index 59a6682d56..7cea1fc248 100644 --- a/src/daft-core/src/python/series.rs +++ b/src/daft-core/src/python/series.rs @@ -15,14 +15,15 @@ use crate::{ }, count_mode::CountMode, datatypes::{DataType, Field, ImageFormat, ImageMode, PythonType}, - ffi, series::{self, IntoSeries, Series}, utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; -use super::datatype::PyDataType; +use common_arrow_ffi as ffi; + use crate::array::ops::as_arrow::AsArrow; use crate::array::ops::trigonometry::TrigonometricFunction; +use daft_schema::python::PyDataType; #[pyclass] #[derive(Clone)] diff --git a/src/daft-core/src/series/array_impl/binary_ops.rs b/src/daft-core/src/series/array_impl/binary_ops.rs index 0537dc36f0..2e3821df3a 100644 --- a/src/daft-core/src/series/array_impl/binary_ops.rs +++ b/src/daft-core/src/series/array_impl/binary_ops.rs @@ -2,6 +2,15 @@ use std::ops::{Add, Div, Mul, Rem, Sub}; use common_error::DaftResult; +use crate::datatypes::logical::{ + DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, + ImageArray, TensorArray, TimeArray, TimestampArray, +}; +use crate::datatypes::InferDataType; +use crate::datatypes::{ + BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, +}; use crate::{ array::{ ops::{DaftCompare, DaftLogical}, @@ -16,15 +25,6 @@ use crate::{ with_match_comparable_daft_types, with_match_integer_daft_types, with_match_numeric_daft_types, }; -use crate::datatypes::logical::{ - DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, - ImageArray, TensorArray, TimeArray, TimestampArray, -}; -use crate::datatypes::{ - BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int16Array, Int32Array, - Int64Array, Int8Array, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, -}; - use super::{ArrayWrapper, IntoSeries, Series}; #[cfg(feature = "python")] @@ -122,7 +122,8 @@ macro_rules! binary_op_unimplemented { macro_rules! py_numeric_binary_op { ($self:expr, $rhs:expr, $op:ident, $pyop:expr) => {{ - let output_type = ($self.data_type().$op($rhs.data_type()))?; + let output_type = + InferDataType::from($self.data_type()).$op(InferDataType::from($rhs.data_type()))?; let lhs = $self.into_series(); use DataType::*; match &output_type { @@ -149,7 +150,8 @@ macro_rules! py_numeric_binary_op { macro_rules! physical_logic_op { ($self:expr, $rhs:expr, $op:ident, $pyop:expr) => {{ - let output_type = ($self.data_type().logical_op($rhs.data_type()))?; + let output_type = InferDataType::from($self.data_type()) + .logical_op(&InferDataType::from($rhs.data_type()))?; let lhs = $self.into_series(); use DataType::*; match &output_type { @@ -177,8 +179,8 @@ macro_rules! physical_logic_op { macro_rules! physical_compare_op { ($self:expr, $rhs:expr, $op:ident, $pyop:expr) => {{ - let (output_type, intermediate, comp_type) = - ($self.data_type().comparison_op($rhs.data_type()))?; + let (output_type, intermediate, comp_type) = InferDataType::from($self.data_type()) + .comparison_op(&InferDataType::from($rhs.data_type()))?; let lhs = $self.into_series(); let (lhs, rhs) = if let Some(ref it) = intermediate { (lhs.cast(it)?, $rhs.cast(it)?) @@ -205,7 +207,8 @@ macro_rules! physical_compare_op { pub(crate) trait SeriesBinaryOps: SeriesLike { fn add(&self, rhs: &Series) -> DaftResult { - let output_type = (self.data_type().add(rhs.data_type()))?; + let output_type = + InferDataType::from(self.data_type()).add(InferDataType::from(rhs.data_type()))?; let lhs = self.into_series(); use DataType::*; match &output_type { @@ -230,7 +233,8 @@ pub(crate) trait SeriesBinaryOps: SeriesLike { py_numeric_binary_op!(self, rhs, mul, "mul") } fn div(&self, rhs: &Series) -> DaftResult { - let output_type = (self.data_type().div(rhs.data_type()))?; + let output_type = + InferDataType::from(self.data_type()).div(InferDataType::from(rhs.data_type()))?; let lhs = self.into_series(); use DataType::*; match &output_type { @@ -302,7 +306,8 @@ impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper { fn add(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() + rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) + InferDataType::from(rhs.data_type()))?; match rhs.data_type() { Duration(..) => { let days = rhs.duration()?.cast_to_days()?; @@ -314,7 +319,8 @@ impl SeriesBinaryOps for ArrayWrapper { } fn sub(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() - rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) - InferDataType::from(rhs.data_type()))?; match rhs.data_type() { Date => { let physical_result = self.0.physical.sub(&rhs.date()?.physical)?; @@ -333,7 +339,8 @@ impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper { fn add(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() + rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) + InferDataType::from(rhs.data_type()))?; let lhs = self.0.clone().into_series(); match rhs.data_type() { Timestamp(..) => { @@ -355,7 +362,8 @@ impl SeriesBinaryOps for ArrayWrapper { fn sub(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() - rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) - InferDataType::from(rhs.data_type()))?; match rhs.data_type() { Duration(..) => { let physical_result = self.0.physical.sub(&rhs.duration()?.physical)?; @@ -369,7 +377,8 @@ impl SeriesBinaryOps for ArrayWrapper { impl SeriesBinaryOps for ArrayWrapper { fn add(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() + rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) + InferDataType::from(rhs.data_type()))?; match rhs.data_type() { Duration(..) => { let physical_result = self.0.physical.add(&rhs.duration()?.physical)?; @@ -380,7 +389,8 @@ impl SeriesBinaryOps for ArrayWrapper { } fn sub(&self, rhs: &Series) -> DaftResult { use DataType::*; - let output_type = (self.data_type() - rhs.data_type())?; + let output_type = + (InferDataType::from(self.data_type()) - InferDataType::from(rhs.data_type()))?; match rhs.data_type() { Duration(..) => { let physical_result = self.0.physical.sub(&rhs.duration()?.physical)?; diff --git a/src/daft-core/src/series/mod.rs b/src/daft-core/src/series/mod.rs index 22d3f2b67f..1501a3dd04 100644 --- a/src/daft-core/src/series/mod.rs +++ b/src/daft-core/src/series/mod.rs @@ -4,7 +4,6 @@ mod ops; mod serdes; mod series_like; use std::{ - borrow::Cow, fmt::{Display, Formatter, Result}, sync::Arc, }; @@ -15,9 +14,9 @@ use crate::{ DataArray, }, datatypes::{DaftDataType, DaftNumericType, DataType, Field, FieldRef, NumericNative}, - utils::display_table::make_comfy_table, with_match_daft_types, }; +use common_display::table_display::{make_comfy_table, StrValue}; use common_error::DaftResult; pub use array_impl::IntoSeries; @@ -100,9 +99,13 @@ impl Series { } pub fn to_comfy_table(&self) -> comfy_table::Table { + let field = self.field(); + let field_disp = format!("{}\n---\n{}", field.name, field.dtype); + make_comfy_table( - vec![Cow::Borrowed(self.field())].as_slice(), - Some([self].as_slice()), + [field_disp].as_slice(), + Some([self as &dyn StrValue].as_slice()), + Some(self.len()), Some(80), ) } diff --git a/src/daft-core/src/series/ops/between.rs b/src/daft-core/src/series/ops/between.rs index aaf358992c..c54e06f83f 100644 --- a/src/daft-core/src/series/ops/between.rs +++ b/src/daft-core/src/series/ops/between.rs @@ -1,8 +1,10 @@ use common_error::DaftResult; use crate::{ - array::ops::DaftBetween, datatypes::BooleanArray, datatypes::DataType, series::IntoSeries, - series::Series, with_match_numeric_daft_types, + array::ops::DaftBetween, + datatypes::{BooleanArray, DataType, InferDataType}, + series::{IntoSeries, Series}, + with_match_numeric_daft_types, }; #[cfg(feature = "python")] @@ -10,12 +12,12 @@ use crate::series::ops::py_between_op_utilfn; impl Series { pub fn between(&self, lower: &Series, upper: &Series) -> DaftResult { - let (_output_type, _intermediate, lower_comp_type) = - self.data_type().comparison_op(lower.data_type())?; - let (_output_type, _intermediate, upper_comp_type) = - self.data_type().comparison_op(upper.data_type())?; - let (output_type, intermediate, comp_type) = - lower_comp_type.comparison_op(&upper_comp_type)?; + let (_output_type, _intermediate, lower_comp_type) = InferDataType::from(self.data_type()) + .comparison_op(&InferDataType::from(lower.data_type()))?; + let (_output_type, _intermediate, upper_comp_type) = InferDataType::from(self.data_type()) + .comparison_op(&InferDataType::from(upper.data_type()))?; + let (output_type, intermediate, comp_type) = InferDataType::from(&lower_comp_type) + .comparison_op(&InferDataType::from(&upper_comp_type))?; let (it_value, it_lower, it_upper) = if let Some(ref it) = intermediate { (self.cast(it)?, lower.cast(it)?, upper.cast(it)?) } else { diff --git a/src/daft-core/src/series/ops/is_in.rs b/src/daft-core/src/series/ops/is_in.rs index e579f93bdd..264b080ce4 100644 --- a/src/daft-core/src/series/ops/is_in.rs +++ b/src/daft-core/src/series/ops/is_in.rs @@ -2,8 +2,7 @@ use common_error::DaftResult; use crate::{ array::ops::DaftIsIn, - datatypes::BooleanArray, - datatypes::DataType, + datatypes::{BooleanArray, DataType, InferDataType}, series::{IntoSeries, Series}, with_match_comparable_daft_types, }; @@ -21,8 +20,8 @@ impl Series { return default(self.name(), self.len()); } - let (output_type, intermediate, comp_type) = - self.data_type().membership_op(items.data_type())?; + let (output_type, intermediate, comp_type) = InferDataType::from(self.data_type()) + .membership_op(&InferDataType::from(items.data_type()))?; let (lhs, rhs) = if let Some(ref it) = intermediate { (self.cast(it)?, items.cast(it)?) diff --git a/src/daft-core/src/series/ops/take.rs b/src/daft-core/src/series/ops/take.rs index 44f4e93303..4f7dfb2a9a 100644 --- a/src/daft-core/src/series/ops/take.rs +++ b/src/daft-core/src/series/ops/take.rs @@ -4,6 +4,7 @@ use crate::{ }; use arrow2::types::IndexRange; +use common_display::table_display::StrValue; use common_error::DaftResult; impl Series { @@ -23,18 +24,20 @@ impl Series { self.inner.take(idx) } - pub fn str_value(&self, idx: usize) -> DaftResult { - self.inner.str_value(idx) - } - pub fn html_value(&self, idx: usize) -> String { self.inner.html_value(idx) } pub fn to_str_values(&self) -> DaftResult { let iter = - IndexRange::new(0i64, self.len() as i64).map(|i| self.str_value(i as usize).ok()); + IndexRange::new(0i64, self.len() as i64).map(|i| Some(self.str_value(i as usize))); let array = Utf8Array::from_iter(self.name(), iter); Ok(array.into_series()) } } + +impl StrValue for Series { + fn str_value(&self, idx: usize) -> String { + self.inner.str_value(idx).unwrap() + } +} diff --git a/src/daft-core/src/utils/display.rs b/src/daft-core/src/utils/display.rs new file mode 100644 index 0000000000..7b1045f5fb --- /dev/null +++ b/src/daft-core/src/utils/display.rs @@ -0,0 +1,104 @@ +use crate::{datatypes::TimeUnit, series::Series}; +use common_display::table_display::StrValue; +use itertools::Itertools; + +pub fn display_date32(val: i32) -> String { + let epoch_date = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); + let date = if val.is_positive() { + epoch_date + chrono::naive::Days::new(val as u64) + } else { + epoch_date - chrono::naive::Days::new(val.unsigned_abs() as u64) + }; + format!("{date}") +} + +pub fn display_time64(val: i64, unit: &TimeUnit) -> String { + let time = match unit { + TimeUnit::Nanoseconds => Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt( + (val / 1_000_000_000) as u32, + (val % 1_000_000_000) as u32, + ) + .unwrap()), + TimeUnit::Microseconds => Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt( + (val / 1_000_000) as u32, + ((val % 1_000_000) * 1_000) as u32, + ) + .unwrap()), + TimeUnit::Milliseconds => { + let seconds = u32::try_from(val / 1_000); + let nanoseconds = u32::try_from((val % 1_000) * 1_000_000); + match (seconds, nanoseconds) { + (Ok(secs), Ok(nano)) => { + Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt(secs, nano).unwrap()) + } + (Err(e), _) => Err(e), + (_, Err(e)) => Err(e), + } + } + TimeUnit::Seconds => { + let seconds = u32::try_from(val); + match seconds { + Ok(secs) => { + Ok(chrono::NaiveTime::from_num_seconds_from_midnight_opt(secs, 0).unwrap()) + } + Err(e) => Err(e), + } + } + }; + + match time { + Ok(time) => format!("{time}"), + Err(e) => format!("Display Error: {e}"), + } +} + +pub fn display_timestamp(val: i64, unit: &TimeUnit, timezone: &Option) -> String { + use crate::array::ops::cast::{ + timestamp_to_str_naive, timestamp_to_str_offset, timestamp_to_str_tz, + }; + + timezone.as_ref().map_or_else( + || timestamp_to_str_naive(val, unit), + |timezone| { + // In arrow, timezone string can be either: + // 1. a fixed offset "-07:00", parsed using parse_offset, or + // 2. a timezone name e.g. "America/Los_Angeles", parsed using parse_offset_tz. + if let Ok(offset) = arrow2::temporal_conversions::parse_offset(timezone) { + timestamp_to_str_offset(val, unit, &offset) + } else if let Ok(tz) = arrow2::temporal_conversions::parse_offset_tz(timezone) { + timestamp_to_str_tz(val, unit, &tz) + } else { + panic!("Unable to parse timezone string {}", timezone) + } + }, + ) +} + +pub fn display_decimal128(val: i128, _precision: u8, scale: i8) -> String { + if scale < 0 { + unimplemented!(); + } else { + let modulus = i128::pow(10, scale as u32); + let integral = val / modulus; + if scale == 0 { + format!("{}", integral) + } else { + let sign = if val < 0 { "-" } else { "" }; + let integral = integral.abs(); + let decimals = (val % modulus).abs(); + let scale = scale as usize; + format!("{}{}.{:0scale$}", sign, integral, decimals) + } + } +} + +pub fn display_series_literal(series: &Series) -> String { + if !series.is_empty() { + format!( + "[{}]", + (0..series.len()).map(|i| series.str_value(i)).join(", ") + ) + } else { + "[]".to_string() + } +} diff --git a/src/daft-core/src/utils/dyn_compare.rs b/src/daft-core/src/utils/dyn_compare.rs index 9e6f82595e..d9903da56f 100644 --- a/src/daft-core/src/utils/dyn_compare.rs +++ b/src/daft-core/src/utils/dyn_compare.rs @@ -1,6 +1,8 @@ use std::cmp::Ordering; -use crate::{datatypes::DataType, schema::Schema}; +use crate::datatypes::DataType; + +use daft_schema::schema::Schema; use arrow2::array::Array; use common_error::DaftError; diff --git a/src/daft-core/src/utils/mod.rs b/src/daft-core/src/utils/mod.rs index 7aa5057e28..b270516ebd 100644 --- a/src/daft-core/src/utils/mod.rs +++ b/src/daft-core/src/utils/mod.rs @@ -1,5 +1,5 @@ pub mod arrow; -pub mod display_table; +pub mod display; pub mod dyn_compare; pub mod identity_hash_set; pub mod supertype; diff --git a/src/daft-csv/src/metadata.rs b/src/daft-csv/src/metadata.rs index 344af13210..2625643b68 100644 --- a/src/daft-csv/src/metadata.rs +++ b/src/daft-csv/src/metadata.rs @@ -4,7 +4,7 @@ use arrow2::io::csv::read_async::{AsyncReader, AsyncReaderBuilder}; use async_compat::CompatExt; use common_error::DaftResult; use csv_async::ByteRecord; -use daft_core::schema::Schema; +use daft_core::prelude::Schema; use daft_io::{get_runtime, GetResult, IOClient, IOStatsRef}; use futures::{StreamExt, TryStreamExt}; use snafu::ResultExt; diff --git a/src/daft-csv/src/options.rs b/src/daft-csv/src/options.rs index 17106035f9..28ab48c573 100644 --- a/src/daft-csv/src/options.rs +++ b/src/daft-csv/src/options.rs @@ -1,10 +1,10 @@ use common_py_serde::impl_bincode_py_state_serialization; -use daft_core::schema::SchemaRef; +use daft_core::prelude::SchemaRef; use daft_dsl::ExprRef; use serde::{Deserialize, Serialize}; #[cfg(feature = "python")] use { - daft_core::python::schema::PySchema, + daft_core::python::PySchema, daft_dsl::python::PyExpr, pyo3::{pyclass, pyclass::CompareOp, pymethods, PyObject, PyResult, Python}, }; diff --git a/src/daft-csv/src/python.rs b/src/daft-csv/src/python.rs index 229f4ada65..591ed8cb90 100644 --- a/src/daft-csv/src/python.rs +++ b/src/daft-csv/src/python.rs @@ -1,7 +1,7 @@ pub mod pylib { use std::sync::Arc; - use daft_core::python::schema::PySchema; + use daft_core::python::PySchema; use daft_io::{get_io_client, python::IOConfig, IOStatsContext}; use daft_table::python::PyTable; use pyo3::{pyfunction, PyResult, Python}; diff --git a/src/daft-csv/src/read.rs b/src/daft-csv/src/read.rs index 618f2e6729..fed74d88e1 100644 --- a/src/daft-csv/src/read.rs +++ b/src/daft-csv/src/read.rs @@ -593,7 +593,7 @@ fn parse_into_column_array_chunk_stream( .iter() .map(|i| fields.get(*i).unwrap().into()) .collect::>(); - let read_schema = Arc::new(daft_core::schema::Schema::new(fields_subset)?); + let read_schema = Arc::new(daft_core::prelude::Schema::new(fields_subset)?); let read_daft_fields = Arc::new( read_schema .fields diff --git a/src/daft-dsl/src/expr.rs b/src/daft-dsl/src/expr.rs index ec48ab796e..5b8bf4ffc7 100644 --- a/src/daft-dsl/src/expr.rs +++ b/src/daft-dsl/src/expr.rs @@ -1,7 +1,7 @@ use common_hashable_float_wrapper::FloatWrapper; use common_treenode::TreeNode; use daft_core::{ - datatypes::{try_mean_supertype, try_sum_supertype}, + datatypes::{try_mean_supertype, try_sum_supertype, InferDataType}, prelude::*, utils::supertype::try_get_supertype, }; @@ -730,7 +730,8 @@ impl Expr { let left_field = left.to_field(schema)?; let right_field = right.to_field(schema)?; let (result_type, _intermediate, _comp_type) = - left_field.dtype.membership_op(&right_field.dtype)?; + InferDataType::from(&left_field.dtype) + .membership_op(&InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Between(value, lower, upper) => { @@ -738,11 +739,14 @@ impl Expr { let lower_field = lower.to_field(schema)?; let upper_field = upper.to_field(schema)?; let (lower_result_type, _intermediate, _comp_type) = - value_field.dtype.membership_op(&lower_field.dtype)?; + InferDataType::from(&value_field.dtype) + .membership_op(&InferDataType::from(&lower_field.dtype))?; let (upper_result_type, _intermediate, _comp_type) = - value_field.dtype.membership_op(&upper_field.dtype)?; + InferDataType::from(&value_field.dtype) + .membership_op(&InferDataType::from(&upper_field.dtype))?; let (result_type, _intermediate, _comp_type) = - lower_result_type.membership_op(&upper_result_type)?; + InferDataType::from(&lower_result_type) + .membership_op(&InferDataType::from(&upper_result_type))?; Ok(Field::new(value_field.name.as_str(), result_type)) } Literal(value) => Ok(Field::new("literal", value.get_type())), @@ -756,7 +760,8 @@ impl Expr { match op { // Logical operations Operator::And | Operator::Or | Operator::Xor => { - let result_type = left_field.dtype.logical_op(&right_field.dtype)?; + let result_type = InferDataType::from(&left_field.dtype) + .logical_op(&InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } @@ -768,37 +773,45 @@ impl Expr { | Operator::LtEq | Operator::GtEq => { let (result_type, _intermediate, _comp_type) = - left_field.dtype.comparison_op(&right_field.dtype)?; + InferDataType::from(&left_field.dtype) + .comparison_op(&InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } // Arithmetic operations Operator::Plus => { - let result_type = (&left_field.dtype + &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + + InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::Minus => { - let result_type = (&left_field.dtype - &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + - InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::Multiply => { - let result_type = (&left_field.dtype * &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + * InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::TrueDivide => { - let result_type = (&left_field.dtype / &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + / InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::Modulus => { - let result_type = (&left_field.dtype % &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + % InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::ShiftLeft => { - let result_type = (&left_field.dtype << &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + << InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::ShiftRight => { - let result_type = (&left_field.dtype >> &right_field.dtype)?; + let result_type = (InferDataType::from(&left_field.dtype) + >> InferDataType::from(&right_field.dtype))?; Ok(Field::new(left_field.name.as_str(), result_type)) } Operator::FloorDivide => { diff --git a/src/daft-dsl/src/functions/float/fill_nan.rs b/src/daft-dsl/src/functions/float/fill_nan.rs index 9c6c44a93e..c9417a0552 100644 --- a/src/daft-dsl/src/functions/float/fill_nan.rs +++ b/src/daft-dsl/src/functions/float/fill_nan.rs @@ -1,6 +1,4 @@ -use daft_core::{ - datatypes::Field, schema::Schema, series::Series, utils::supertype::try_get_supertype, -}; +use daft_core::{prelude::*, utils::supertype::try_get_supertype}; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/list/chunk.rs b/src/daft-dsl/src/functions/list/chunk.rs index a12c3f6bbf..2d1676a24e 100644 --- a/src/daft-dsl/src/functions/list/chunk.rs +++ b/src/daft-dsl/src/functions/list/chunk.rs @@ -1,5 +1,5 @@ use crate::ExprRef; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::{super::FunctionEvaluator, ListExpr}; use crate::functions::FunctionExpr; diff --git a/src/daft-dsl/src/functions/list/explode.rs b/src/daft-dsl/src/functions/list/explode.rs index 4298599c97..e3c9716b01 100644 --- a/src/daft-dsl/src/functions/list/explode.rs +++ b/src/daft-dsl/src/functions/list/explode.rs @@ -1,7 +1,7 @@ use crate::functions::FunctionExpr; use crate::ExprRef; use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; diff --git a/src/daft-dsl/src/functions/list/get.rs b/src/daft-dsl/src/functions/list/get.rs index 1431aa4cd4..69543bb5cd 100644 --- a/src/daft-dsl/src/functions/list/get.rs +++ b/src/daft-dsl/src/functions/list/get.rs @@ -1,5 +1,5 @@ use crate::ExprRef; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; diff --git a/src/daft-dsl/src/functions/list/max.rs b/src/daft-dsl/src/functions/list/max.rs index a85c4b5b1c..b6658b4d67 100644 --- a/src/daft-dsl/src/functions/list/max.rs +++ b/src/daft-dsl/src/functions/list/max.rs @@ -1,5 +1,5 @@ use crate::ExprRef; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; diff --git a/src/daft-dsl/src/functions/list/min.rs b/src/daft-dsl/src/functions/list/min.rs index 8bf4db1d7f..41bda23146 100644 --- a/src/daft-dsl/src/functions/list/min.rs +++ b/src/daft-dsl/src/functions/list/min.rs @@ -1,5 +1,5 @@ use crate::ExprRef; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use common_error::{DaftError, DaftResult}; diff --git a/src/daft-dsl/src/functions/list/slice.rs b/src/daft-dsl/src/functions/list/slice.rs index 295beaaf54..e03bc0a173 100644 --- a/src/daft-dsl/src/functions/list/slice.rs +++ b/src/daft-dsl/src/functions/list/slice.rs @@ -1,5 +1,5 @@ use crate::ExprRef; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use crate::functions::FunctionExpr; diff --git a/src/daft-dsl/src/functions/mod.rs b/src/daft-dsl/src/functions/mod.rs index f66512e853..435cfd38e4 100644 --- a/src/daft-dsl/src/functions/mod.rs +++ b/src/daft-dsl/src/functions/mod.rs @@ -28,8 +28,7 @@ use self::utf8::Utf8Expr; pub use scalar::*; use common_error::DaftResult; -use daft_core::datatypes::FieldID; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use serde::{Deserialize, Serialize}; diff --git a/src/daft-dsl/src/functions/numeric/abs.rs b/src/daft-dsl/src/functions/numeric/abs.rs index 20e7239717..5dfe92a321 100644 --- a/src/daft-dsl/src/functions/numeric/abs.rs +++ b/src/daft-dsl/src/functions/numeric/abs.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/numeric/ceil.rs b/src/daft-dsl/src/functions/numeric/ceil.rs index df32e0d98a..1be899a0b2 100644 --- a/src/daft-dsl/src/functions/numeric/ceil.rs +++ b/src/daft-dsl/src/functions/numeric/ceil.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/numeric/floor.rs b/src/daft-dsl/src/functions/numeric/floor.rs index 21d6a2a863..1435c5c674 100644 --- a/src/daft-dsl/src/functions/numeric/floor.rs +++ b/src/daft-dsl/src/functions/numeric/floor.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use crate::functions::FunctionExpr; diff --git a/src/daft-dsl/src/functions/numeric/round.rs b/src/daft-dsl/src/functions/numeric/round.rs index 7bd079a4b2..2724aa586c 100644 --- a/src/daft-dsl/src/functions/numeric/round.rs +++ b/src/daft-dsl/src/functions/numeric/round.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use super::NumericExpr; diff --git a/src/daft-dsl/src/functions/numeric/sign.rs b/src/daft-dsl/src/functions/numeric/sign.rs index 0137701ac3..29dc883bb2 100644 --- a/src/daft-dsl/src/functions/numeric/sign.rs +++ b/src/daft-dsl/src/functions/numeric/sign.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use crate::functions::FunctionExpr; diff --git a/src/daft-dsl/src/functions/numeric/sqrt.rs b/src/daft-dsl/src/functions/numeric/sqrt.rs index cac59e27ff..6af0380490 100644 --- a/src/daft-dsl/src/functions/numeric/sqrt.rs +++ b/src/daft-dsl/src/functions/numeric/sqrt.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/python/udf.rs b/src/daft-dsl/src/functions/python/udf.rs index fed06ab07f..5cb99967d2 100644 --- a/src/daft-dsl/src/functions/python/udf.rs +++ b/src/daft-dsl/src/functions/python/udf.rs @@ -3,7 +3,7 @@ use daft_core::datatypes::DataType; #[cfg(feature = "python")] use pyo3::{types::PyModule, PyAny, PyResult}; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::ExprRef; @@ -60,7 +60,8 @@ fn run_udf( return_dtype: &DataType, batch_size: Option, ) -> DaftResult { - use daft_core::python::{PyDataType, PySeries}; + use daft_core::python::PyDataType; + use daft_core::python::PySeries; // Convert input Rust &[Series] to wrapped Python Vec<&PyAny> let py_series_module = PyModule::import(py, pyo3::intern!(py, "daft.series"))?; diff --git a/src/daft-dsl/src/functions/scalar.rs b/src/daft-dsl/src/functions/scalar.rs index f1cf1c5fad..4a77454f34 100644 --- a/src/daft-dsl/src/functions/scalar.rs +++ b/src/daft-dsl/src/functions/scalar.rs @@ -3,8 +3,7 @@ use std::fmt::{Display, Formatter}; use std::sync::Arc; use common_error::DaftResult; -use daft_core::datatypes::FieldID; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::{Expr, ExprRef}; diff --git a/src/daft-dsl/src/functions/sketch/percentile.rs b/src/daft-dsl/src/functions/sketch/percentile.rs index c9ff835725..6102bf1d15 100644 --- a/src/daft-dsl/src/functions/sketch/percentile.rs +++ b/src/daft-dsl/src/functions/sketch/percentile.rs @@ -1,5 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{datatypes::DataType, datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use super::super::FunctionEvaluator; use super::SketchExpr; diff --git a/src/daft-dsl/src/functions/temporal/time.rs b/src/daft-dsl/src/functions/temporal/time.rs index 651c34c98f..4da0da2d9e 100644 --- a/src/daft-dsl/src/functions/temporal/time.rs +++ b/src/daft-dsl/src/functions/temporal/time.rs @@ -1,9 +1,5 @@ use common_error::{DaftError, DaftResult}; -use daft_core::{ - datatypes::{DataType, Field, TimeUnit}, - schema::Schema, - series::Series, -}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/temporal/truncate.rs b/src/daft-dsl/src/functions/temporal/truncate.rs index df1f08c10d..1e0aebcdf2 100644 --- a/src/daft-dsl/src/functions/temporal/truncate.rs +++ b/src/daft-dsl/src/functions/temporal/truncate.rs @@ -1,4 +1,4 @@ -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/utf8/length_bytes.rs b/src/daft-dsl/src/functions/utf8/length_bytes.rs index ca58798150..ec6532edc8 100644 --- a/src/daft-dsl/src/functions/utf8/length_bytes.rs +++ b/src/daft-dsl/src/functions/utf8/length_bytes.rs @@ -1,8 +1,4 @@ -use daft_core::{ - datatypes::{DataType, Field}, - schema::Schema, - series::Series, -}; +use daft_core::prelude::*; use crate::functions::FunctionExpr; use crate::ExprRef; diff --git a/src/daft-dsl/src/functions/utf8/to_datetime.rs b/src/daft-dsl/src/functions/utf8/to_datetime.rs index d4e281cc5e..b92a58aaa9 100644 --- a/src/daft-dsl/src/functions/utf8/to_datetime.rs +++ b/src/daft-dsl/src/functions/utf8/to_datetime.rs @@ -3,7 +3,7 @@ use crate::ExprRef; use common_error::{DaftError, DaftResult}; use daft_core::prelude::*; -use daft_core::datatypes::utils::infer_timeunit_from_format_string; +use daft_core::datatypes::infer_timeunit_from_format_string; use super::{super::FunctionEvaluator, Utf8Expr}; diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index 0dde81734d..b85d8804af 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -5,7 +5,7 @@ use common_error::{DaftError, DaftResult}; use common_hashable_float_wrapper::FloatWrapper; use daft_core::{ prelude::*, - utils::display_table::{ + utils::display::{ display_date32, display_decimal128, display_series_literal, display_time64, display_timestamp, }, diff --git a/src/daft-dsl/src/python.rs b/src/daft-dsl/src/python.rs index e9309bc6c1..d3bb2cfdde 100644 --- a/src/daft-dsl/src/python.rs +++ b/src/daft-dsl/src/python.rs @@ -8,15 +8,12 @@ use common_error::DaftError; use common_py_serde::impl_bincode_py_state_serialization; use common_resource_request::ResourceRequest; use daft_core::array::ops::Utf8NormalizeOptions; -use daft_core::python::datatype::PyTimeUnit; use daft_core::python::PySeries; +use daft_core::python::{PyDataType, PyField, PySchema, PyTimeUnit}; use serde::{Deserialize, Serialize}; use crate::{functions, Expr, ExprRef, LiteralValue}; -use daft_core::{ - prelude::*, - python::{datatype::PyDataType, field::PyField, schema::PySchema}, -}; +use daft_core::prelude::*; use pyo3::{ exceptions::PyValueError, diff --git a/src/daft-functions/src/image/crop.rs b/src/daft-functions/src/image/crop.rs index d7ed7d6665..e687174339 100644 --- a/src/daft-functions/src/image/crop.rs +++ b/src/daft-functions/src/image/crop.rs @@ -1,6 +1,5 @@ use common_error::DaftError; -use daft_core::datatypes::DataType; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use common_error::DaftResult; use daft_dsl::functions::{ScalarFunction, ScalarUDF}; diff --git a/src/daft-functions/src/image/decode.rs b/src/daft-functions/src/image/decode.rs index 0229102ea5..121c50555e 100644 --- a/src/daft-functions/src/image/decode.rs +++ b/src/daft-functions/src/image/decode.rs @@ -1,8 +1,4 @@ -use daft_core::{ - datatypes::{DataType, Field, ImageMode}, - schema::Schema, - series::Series, -}; +use daft_core::prelude::*; use common_error::{DaftError, DaftResult}; use daft_dsl::{ diff --git a/src/daft-functions/src/image/encode.rs b/src/daft-functions/src/image/encode.rs index cff7bc9f57..9b42c14d87 100644 --- a/src/daft-functions/src/image/encode.rs +++ b/src/daft-functions/src/image/encode.rs @@ -1,8 +1,4 @@ -use daft_core::{ - datatypes::{DataType, Field, ImageFormat}, - schema::Schema, - series::Series, -}; +use daft_core::prelude::*; use common_error::{DaftError, DaftResult}; use daft_dsl::{ diff --git a/src/daft-functions/src/image/resize.rs b/src/daft-functions/src/image/resize.rs index d71a46a30c..24c0bf0717 100644 --- a/src/daft-functions/src/image/resize.rs +++ b/src/daft-functions/src/image/resize.rs @@ -1,6 +1,5 @@ use common_error::DaftError; -use daft_core::datatypes::DataType; -use daft_core::{datatypes::Field, schema::Schema, series::Series}; +use daft_core::prelude::*; use common_error::DaftResult; use daft_dsl::functions::{ScalarFunction, ScalarUDF}; diff --git a/src/daft-functions/src/image/to_mode.rs b/src/daft-functions/src/image/to_mode.rs index 625bb5f71b..4648c1b04a 100644 --- a/src/daft-functions/src/image/to_mode.rs +++ b/src/daft-functions/src/image/to_mode.rs @@ -1,8 +1,4 @@ -use daft_core::{ - datatypes::{DataType, Field, ImageMode}, - schema::Schema, - series::Series, -}; +use daft_core::prelude::*; use common_error::{DaftError, DaftResult}; use daft_dsl::{ diff --git a/src/daft-json/src/options.rs b/src/daft-json/src/options.rs index feeae7559a..be045e16fa 100644 --- a/src/daft-json/src/options.rs +++ b/src/daft-json/src/options.rs @@ -1,10 +1,10 @@ use common_py_serde::impl_bincode_py_state_serialization; -use daft_core::schema::SchemaRef; +use daft_core::prelude::SchemaRef; use daft_dsl::ExprRef; use serde::{Deserialize, Serialize}; #[cfg(feature = "python")] use { - daft_core::python::schema::PySchema, + daft_core::python::PySchema, daft_dsl::python::PyExpr, pyo3::{pyclass, pyclass::CompareOp, pymethods, PyObject, PyResult, Python}, }; diff --git a/src/daft-json/src/python.rs b/src/daft-json/src/python.rs index 139ba4a531..d09912aca9 100644 --- a/src/daft-json/src/python.rs +++ b/src/daft-json/src/python.rs @@ -1,7 +1,7 @@ pub mod pylib { use std::sync::Arc; - use daft_core::python::schema::PySchema; + use daft_core::python::PySchema; use daft_io::{get_io_client, python::IOConfig, IOStatsContext}; use daft_table::python::PyTable; use pyo3::{pyfunction, PyResult, Python}; diff --git a/src/daft-json/src/read.rs b/src/daft-json/src/read.rs index 7ca307a0c7..ed9553179d 100644 --- a/src/daft-json/src/read.rs +++ b/src/daft-json/src/read.rs @@ -503,7 +503,7 @@ fn parse_into_column_array_chunk_stream( schema: Arc, schema_is_projection: bool, ) -> DaftResult { - let daft_schema = Arc::new(daft_core::schema::Schema::try_from(schema.as_ref())?); + let daft_schema = Arc::new(daft_core::prelude::Schema::try_from(schema.as_ref())?); let daft_fields = Arc::new( daft_schema .fields diff --git a/src/daft-json/src/schema.rs b/src/daft-json/src/schema.rs index 6b66d663f3..6d676a5603 100644 --- a/src/daft-json/src/schema.rs +++ b/src/daft-json/src/schema.rs @@ -1,7 +1,7 @@ use std::{collections::HashSet, sync::Arc}; use common_error::DaftResult; -use daft_core::schema::Schema; +use daft_core::prelude::Schema; use daft_io::{get_runtime, GetResult, IOClient, IOStatsRef}; use futures::{StreamExt, TryStreamExt}; use indexmap::IndexMap; diff --git a/src/daft-local-execution/src/pipeline.rs b/src/daft-local-execution/src/pipeline.rs index f182b9f9cc..8e39a1e704 100644 --- a/src/daft-local-execution/src/pipeline.rs +++ b/src/daft-local-execution/src/pipeline.rs @@ -18,11 +18,10 @@ use crate::{ use common_display::{mermaid::MermaidDisplayVisitor, tree::TreeDisplay}; use common_error::DaftResult; -use daft_core::{ - datatypes::Field, - schema::{Schema, SchemaRef}, - utils::supertype, -}; +use daft_core::{datatypes::Field, utils::supertype}; + +use daft_core::prelude::{Schema, SchemaRef}; + use daft_dsl::Expr; use daft_micropartition::MicroPartition; use daft_physical_plan::{ diff --git a/src/daft-local-execution/src/sinks/hash_join_build.rs b/src/daft-local-execution/src/sinks/hash_join_build.rs index b7940d4ebe..d5ca26f713 100644 --- a/src/daft-local-execution/src/sinks/hash_join_build.rs +++ b/src/daft-local-execution/src/sinks/hash_join_build.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use crate::pipeline::PipelineResultType; use common_error::DaftResult; -use daft_core::schema::SchemaRef; +use daft_core::prelude::SchemaRef; use daft_dsl::ExprRef; use daft_micropartition::MicroPartition; diff --git a/src/daft-micropartition/src/micropartition.rs b/src/daft-micropartition/src/micropartition.rs index ad40f2e514..df873469fa 100644 --- a/src/daft-micropartition/src/micropartition.rs +++ b/src/daft-micropartition/src/micropartition.rs @@ -1157,7 +1157,7 @@ pub(crate) fn read_parquet_into_micropartition( .iter() .map(|m| { let schema = infer_schema_with_options(m, &Some((*schema_infer_options).into()))?; - let daft_schema = daft_core::schema::Schema::try_from(&schema)?; + let daft_schema = daft_core::prelude::Schema::try_from(&schema)?; DaftResult::Ok(Arc::new(daft_schema)) }) .collect::>>()?; @@ -1181,7 +1181,7 @@ pub(crate) fn read_parquet_into_micropartition( .iter() .map(|m| { let schema = infer_schema_with_options(m, &Some((*schema_infer_options).into()))?; - let daft_schema = daft_core::schema::Schema::try_from(&schema)?; + let daft_schema = daft_core::prelude::Schema::try_from(&schema)?; DaftResult::Ok(Arc::new(daft_schema)) }) .collect::>>()?; diff --git a/src/daft-micropartition/src/ops/cast_to_schema.rs b/src/daft-micropartition/src/ops/cast_to_schema.rs index a09cc885db..e83c0774e1 100644 --- a/src/daft-micropartition/src/ops/cast_to_schema.rs +++ b/src/daft-micropartition/src/ops/cast_to_schema.rs @@ -1,7 +1,7 @@ use std::{ops::Deref, sync::Arc}; use common_error::DaftResult; -use daft_core::schema::SchemaRef; +use daft_core::prelude::SchemaRef; use daft_scan::ScanTask; use crate::micropartition::{MicroPartition, TableState}; diff --git a/src/daft-micropartition/src/ops/eval_expressions.rs b/src/daft-micropartition/src/ops/eval_expressions.rs index 2d6c2d44b0..8479b560b7 100644 --- a/src/daft-micropartition/src/ops/eval_expressions.rs +++ b/src/daft-micropartition/src/ops/eval_expressions.rs @@ -1,7 +1,7 @@ use std::{collections::HashSet, sync::Arc}; use common_error::{DaftError, DaftResult}; -use daft_core::schema::Schema; +use daft_core::prelude::Schema; use daft_dsl::ExprRef; use daft_io::IOStatsContext; use snafu::ResultExt; diff --git a/src/daft-micropartition/src/python.rs b/src/daft-micropartition/src/python.rs index a5f75f1403..86bec613bb 100644 --- a/src/daft-micropartition/src/python.rs +++ b/src/daft-micropartition/src/python.rs @@ -4,10 +4,11 @@ use std::{ }; use common_error::DaftResult; -use daft_core::{ - prelude::*, - python::{datatype::PyTimeUnit, schema::PySchema, PySeries}, -}; +use daft_core::prelude::*; + +use daft_core::python::PySeries; +use daft_core::python::{PySchema, PyTimeUnit}; + use daft_csv::{CsvConvertOptions, CsvParseOptions, CsvReadOptions}; use daft_dsl::python::PyExpr; use daft_io::{python::IOConfig, IOStatsContext}; diff --git a/src/daft-parquet/Cargo.toml b/src/daft-parquet/Cargo.toml index 90dff06c75..3e1a4876b8 100644 --- a/src/daft-parquet/Cargo.toml +++ b/src/daft-parquet/Cargo.toml @@ -3,6 +3,7 @@ arrow2 = {workspace = true, features = ["io_parquet", "io_parquet_compression"]} async-compat = {workspace = true} async-stream = {workspace = true} bytes = {workspace = true} +common-arrow-ffi = {path = "../common/arrow-ffi", default-features = false} common-error = {path = "../common/error", default-features = false} crossbeam-channel = "0.5.1" daft-core = {path = "../daft-core", default-features = false} @@ -27,7 +28,7 @@ tokio-util = {workspace = true} bincode = {workspace = true} [features] -python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-io/python", "daft-table/python", "daft-stats/python", "daft-dsl/python"] +python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-io/python", "daft-table/python", "daft-stats/python", "daft-dsl/python", "common-arrow-ffi/python"] [package] edition = {workspace = true} diff --git a/src/daft-parquet/src/file.rs b/src/daft-parquet/src/file.rs index f7dbdfa61f..57e7649ed0 100644 --- a/src/daft-parquet/src/file.rs +++ b/src/daft-parquet/src/file.rs @@ -400,7 +400,7 @@ impl ParquetFileReader { original_columns: Option>, original_num_rows: Option, ) -> DaftResult>> { - let daft_schema = Arc::new(daft_core::schema::Schema::try_from( + let daft_schema = Arc::new(daft_core::prelude::Schema::try_from( self.arrow_schema.as_ref(), )?); @@ -720,7 +720,7 @@ impl ParquetFileReader { })? .into_iter() .collect::>>()?; - let daft_schema = daft_core::schema::Schema::try_from(self.arrow_schema.as_ref())?; + let daft_schema = daft_core::prelude::Schema::try_from(self.arrow_schema.as_ref())?; Table::new_with_size( daft_schema, diff --git a/src/daft-parquet/src/python.rs b/src/daft-parquet/src/python.rs index afa93b9019..c22e6da97d 100644 --- a/src/daft-parquet/src/python.rs +++ b/src/daft-parquet/src/python.rs @@ -1,10 +1,9 @@ use pyo3::prelude::*; pub mod pylib { - use daft_core::{ - ffi::field_to_py, - python::{datatype::PyTimeUnit, schema::PySchema, PySeries}, - }; + use common_arrow_ffi::{field_to_py, to_py_array}; + use daft_core::python::PySeries; + use daft_core::python::{PySchema, PyTimeUnit}; use daft_dsl::python::PyExpr; use daft_io::{get_io_client, python::IOConfig, IOStatsContext}; use daft_table::python::PyTable; @@ -12,7 +11,6 @@ pub mod pylib { use std::{collections::BTreeMap, sync::Arc}; use crate::read::{ArrowChunk, ParquetSchemaInferenceOptions}; - use daft_core::ffi::to_py_array; #[allow(clippy::too_many_arguments)] #[pyfunction] pub fn read_parquet( diff --git a/src/daft-parquet/src/statistics/table_stats.rs b/src/daft-parquet/src/statistics/table_stats.rs index f560c8208a..ddc306c4ca 100644 --- a/src/daft-parquet/src/statistics/table_stats.rs +++ b/src/daft-parquet/src/statistics/table_stats.rs @@ -1,5 +1,5 @@ use common_error::DaftResult; -use daft_core::schema::Schema; +use daft_core::prelude::Schema; use daft_stats::{ColumnRangeStatistics, TableStatistics}; use snafu::ResultExt; diff --git a/src/daft-plan/Cargo.toml b/src/daft-plan/Cargo.toml index 9eb36c326a..b33c4aea28 100644 --- a/src/daft-plan/Cargo.toml +++ b/src/daft-plan/Cargo.toml @@ -24,6 +24,7 @@ daft-core = {path = "../daft-core", default-features = false} daft-dsl = {path = "../daft-dsl", default-features = false} daft-io = {path = "../daft-io", default-features = false} daft-scan = {path = "../daft-scan", default-features = false} +daft-schema = {path = "../daft-schema", default-features = false} daft-table = {path = "../daft-table", default-features = false} indexmap = {workspace = true} itertools = {workspace = true} @@ -52,7 +53,8 @@ python = [ "daft-io/python", "daft-functions/python", "daft-table/python", - "daft-scan/python" + "daft-scan/python", + "daft-schema/python" ] [package] diff --git a/src/daft-plan/src/builder.rs b/src/daft-plan/src/builder.rs index 9a5719c652..f6dc75d807 100644 --- a/src/daft-plan/src/builder.rs +++ b/src/daft-plan/src/builder.rs @@ -18,10 +18,9 @@ use common_daft_config::DaftPlanningConfig; use common_display::mermaid::MermaidDisplayOptions; use common_error::DaftResult; use common_io_config::IOConfig; -use daft_core::{ - join::{JoinStrategy, JoinType}, - schema::{Schema, SchemaRef}, -}; +use daft_core::join::{JoinStrategy, JoinType}; +use daft_schema::schema::{Schema, SchemaRef}; + use daft_dsl::{col, ExprRef}; use daft_io::FileFormat; use daft_scan::{PhysicalScanInfo, Pushdowns, ScanOperatorRef}; @@ -31,9 +30,9 @@ use { crate::sink_info::{CatalogInfo, IcebergCatalogInfo}, crate::source_info::InMemoryInfo, common_daft_config::PyDaftPlanningConfig, - daft_core::python::schema::PySchema, daft_dsl::python::PyExpr, daft_scan::python::pylib::ScanOperatorHandle, + daft_schema::python::schema::PySchema, pyo3::prelude::*, }; diff --git a/src/daft-plan/src/logical_ops/actor_pool_project.rs b/src/daft-plan/src/logical_ops/actor_pool_project.rs index 524742afa6..b353636240 100644 --- a/src/daft-plan/src/logical_ops/actor_pool_project.rs +++ b/src/daft-plan/src/logical_ops/actor_pool_project.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use common_error::DaftError; use common_resource_request::ResourceRequest; use common_treenode::TreeNode; -use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{ functions::{ python::{get_concurrency, get_resource_request, PythonUDF, StatefulPythonUDF}, @@ -11,6 +10,7 @@ use daft_dsl::{ }, resolve_exprs, Expr, ExprRef, }; +use daft_schema::schema::{Schema, SchemaRef}; use itertools::Itertools; use snafu::ResultExt; diff --git a/src/daft-plan/src/logical_ops/agg.rs b/src/daft-plan/src/logical_ops/agg.rs index f74848fe7f..0512a33347 100644 --- a/src/daft-plan/src/logical_ops/agg.rs +++ b/src/daft-plan/src/logical_ops/agg.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use itertools::Itertools; use snafu::ResultExt; -use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{resolve_aggexprs, resolve_exprs, AggExpr, ExprRef}; +use daft_schema::schema::{Schema, SchemaRef}; use crate::logical_plan::{self, CreationSnafu}; use crate::LogicalPlan; diff --git a/src/daft-plan/src/logical_ops/explode.rs b/src/daft-plan/src/logical_ops/explode.rs index 16cb846cf1..f779f58871 100644 --- a/src/daft-plan/src/logical_ops/explode.rs +++ b/src/daft-plan/src/logical_ops/explode.rs @@ -1,7 +1,7 @@ use std::sync::Arc; -use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{resolve_exprs, ExprRef}; +use daft_schema::schema::{Schema, SchemaRef}; use itertools::Itertools; use snafu::ResultExt; diff --git a/src/daft-plan/src/logical_ops/pivot.rs b/src/daft-plan/src/logical_ops/pivot.rs index 20208859c3..0809cc9023 100644 --- a/src/daft-plan/src/logical_ops/pivot.rs +++ b/src/daft-plan/src/logical_ops/pivot.rs @@ -4,8 +4,8 @@ use daft_core::prelude::*; use itertools::Itertools; use snafu::ResultExt; -use daft_core::schema::{Schema, SchemaRef}; use daft_dsl::{resolve_exprs, resolve_single_aggexpr, resolve_single_expr, AggExpr, ExprRef}; +use daft_schema::schema::{Schema, SchemaRef}; use crate::logical_plan::{self, CreationSnafu}; use crate::LogicalPlan; diff --git a/src/daft-plan/src/logical_ops/source.rs b/src/daft-plan/src/logical_ops/source.rs index 0b1e652bce..3d9d88ad89 100644 --- a/src/daft-plan/src/logical_ops/source.rs +++ b/src/daft-plan/src/logical_ops/source.rs @@ -1,7 +1,7 @@ use std::sync::Arc; -use daft_core::schema::SchemaRef; use daft_scan::PhysicalScanInfo; +use daft_schema::schema::SchemaRef; use crate::source_info::SourceInfo; diff --git a/src/daft-plan/src/logical_plan.rs b/src/daft-plan/src/logical_plan.rs index 4c7448db58..e59e8a03a2 100644 --- a/src/daft-plan/src/logical_plan.rs +++ b/src/daft-plan/src/logical_plan.rs @@ -1,8 +1,8 @@ use std::{num::NonZeroUsize, sync::Arc}; use common_error::DaftError; -use daft_core::schema::SchemaRef; use daft_dsl::optimization::get_required_columns; +use daft_schema::schema::SchemaRef; use indexmap::IndexSet; use snafu::Snafu; diff --git a/src/daft-plan/src/physical_ops/csv.rs b/src/daft-plan/src/physical_ops/csv.rs index 797bfbae3d..a1859d9be0 100644 --- a/src/daft-plan/src/physical_ops/csv.rs +++ b/src/daft-plan/src/physical_ops/csv.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/physical_ops/deltalake_write.rs b/src/daft-plan/src/physical_ops/deltalake_write.rs index 7c6c43eb0c..60844806c7 100644 --- a/src/daft-plan/src/physical_ops/deltalake_write.rs +++ b/src/daft-plan/src/physical_ops/deltalake_write.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::DeltaLakeCatalogInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/physical_ops/empty_scan.rs b/src/daft-plan/src/physical_ops/empty_scan.rs index 7884ba10f2..191b7f3b2c 100644 --- a/src/daft-plan/src/physical_ops/empty_scan.rs +++ b/src/daft-plan/src/physical_ops/empty_scan.rs @@ -1,6 +1,6 @@ use crate::ClusteringSpec; use common_display::tree::TreeDisplay; -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use serde::{Deserialize, Serialize}; use std::sync::Arc; diff --git a/src/daft-plan/src/physical_ops/iceberg_write.rs b/src/daft-plan/src/physical_ops/iceberg_write.rs index 82a357e33f..d2c6086cd1 100644 --- a/src/daft-plan/src/physical_ops/iceberg_write.rs +++ b/src/daft-plan/src/physical_ops/iceberg_write.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::IcebergCatalogInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/physical_ops/in_memory.rs b/src/daft-plan/src/physical_ops/in_memory.rs index 3f0bc7bb17..c4fc7d0bd4 100644 --- a/src/daft-plan/src/physical_ops/in_memory.rs +++ b/src/daft-plan/src/physical_ops/in_memory.rs @@ -1,6 +1,6 @@ use crate::{source_info::InMemoryInfo, ClusteringSpec}; use common_display::{tree::TreeDisplay, DisplayLevel}; -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use serde::{Deserialize, Serialize}; use std::sync::Arc; diff --git a/src/daft-plan/src/physical_ops/json.rs b/src/daft-plan/src/physical_ops/json.rs index f0c307c9a9..299bf17108 100644 --- a/src/daft-plan/src/physical_ops/json.rs +++ b/src/daft-plan/src/physical_ops/json.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/physical_ops/lance_write.rs b/src/daft-plan/src/physical_ops/lance_write.rs index 685d181d0d..0b88130ede 100644 --- a/src/daft-plan/src/physical_ops/lance_write.rs +++ b/src/daft-plan/src/physical_ops/lance_write.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::LanceCatalogInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/physical_ops/parquet.rs b/src/daft-plan/src/physical_ops/parquet.rs index e4fa6df680..bf9629cea5 100644 --- a/src/daft-plan/src/physical_ops/parquet.rs +++ b/src/daft-plan/src/physical_ops/parquet.rs @@ -1,4 +1,4 @@ -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{physical_plan::PhysicalPlanRef, sink_info::OutputFileInfo}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-plan/src/source_info/mod.rs b/src/daft-plan/src/source_info/mod.rs index f1b31e5ab3..8ee8aef5ca 100644 --- a/src/daft-plan/src/source_info/mod.rs +++ b/src/daft-plan/src/source_info/mod.rs @@ -1,6 +1,6 @@ pub mod file_info; -use daft_core::schema::SchemaRef; use daft_scan::PhysicalScanInfo; +use daft_schema::schema::SchemaRef; pub use file_info::{FileInfo, FileInfos}; use serde::{Deserialize, Serialize}; use std::hash::Hash; diff --git a/src/daft-plan/src/test/mod.rs b/src/daft-plan/src/test/mod.rs index 1b67812efb..5922ef246b 100644 --- a/src/daft-plan/src/test/mod.rs +++ b/src/daft-plan/src/test/mod.rs @@ -1,10 +1,10 @@ use std::sync::Arc; -use daft_core::{datatypes::Field, schema::Schema}; use daft_scan::{ file_format::FileFormatConfig, storage_config::NativeStorageConfig, storage_config::StorageConfig, AnonymousScanOperator, Pushdowns, ScanOperator, }; +use daft_schema::{field::Field, schema::Schema}; use crate::builder::LogicalPlanBuilder; diff --git a/src/daft-scan/Cargo.toml b/src/daft-scan/Cargo.toml index 19637aa7ca..656e5a17b6 100644 --- a/src/daft-scan/Cargo.toml +++ b/src/daft-scan/Cargo.toml @@ -10,6 +10,7 @@ daft-dsl = {path = "../daft-dsl", default-features = false} daft-io = {path = "../daft-io", default-features = false} daft-json = {path = "../daft-json", default-features = false} daft-parquet = {path = "../daft-parquet", default-features = false} +daft-schema = {path = "../daft-schema", default-features = false} daft-stats = {path = "../daft-stats", default-features = false} daft-table = {path = "../daft-table", default-features = false} futures = {workspace = true} @@ -21,7 +22,7 @@ serde_json = {workspace = true} snafu = {workspace = true} [features] -python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "daft-table/python", "daft-stats/python", "common-io-config/python", "common-daft-config/python"] +python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "daft-table/python", "daft-stats/python", "common-io-config/python", "common-daft-config/python", "daft-schema/python"] [package] edition = {workspace = true} diff --git a/src/daft-scan/src/anonymous.rs b/src/daft-scan/src/anonymous.rs index 1b9ece871b..d4869048a6 100644 --- a/src/daft-scan/src/anonymous.rs +++ b/src/daft-scan/src/anonymous.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use common_error::DaftResult; -use daft_core::schema::SchemaRef; +use daft_schema::schema::SchemaRef; use crate::{ file_format::{FileFormatConfig, ParquetSourceConfig}, diff --git a/src/daft-scan/src/file_format.rs b/src/daft-scan/src/file_format.rs index c0daa61ca7..91b47be6ae 100644 --- a/src/daft-scan/src/file_format.rs +++ b/src/daft-scan/src/file_format.rs @@ -9,7 +9,7 @@ use common_py_serde::impl_bincode_py_state_serialization; #[cfg(feature = "python")] use { common_py_serde::{deserialize_py_object, serialize_py_object}, - daft_core::python::{datatype::PyTimeUnit, field::PyField}, + daft_schema::python::{datatype::PyTimeUnit, field::PyField}, pyo3::{pyclass, pyclass::CompareOp, pymethods, IntoPy, PyObject, PyResult, Python}, }; diff --git a/src/daft-scan/src/glob.rs b/src/daft-scan/src/glob.rs index af60a0e0b0..1151c41c9d 100644 --- a/src/daft-scan/src/glob.rs +++ b/src/daft-scan/src/glob.rs @@ -1,12 +1,12 @@ use std::{sync::Arc, vec}; use common_error::{DaftError, DaftResult}; -use daft_core::schema::SchemaRef; use daft_csv::CsvParseOptions; use daft_io::{ parse_url, FileFormat, FileMetadata, IOClient, IOStatsContext, IOStatsRef, RuntimeRef, }; use daft_parquet::read::ParquetSchemaInferenceOptions; +use daft_schema::schema::SchemaRef; use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use snafu::Snafu; diff --git a/src/daft-scan/src/lib.rs b/src/daft-scan/src/lib.rs index 98d557463d..bee62535b0 100644 --- a/src/daft-scan/src/lib.rs +++ b/src/daft-scan/src/lib.rs @@ -9,11 +9,11 @@ use std::{ use common_display::DisplayAs; use common_error::{DaftError, DaftResult}; -use daft_core::{ - datatypes::Field, +use daft_dsl::ExprRef; +use daft_schema::{ + field::Field, schema::{Schema, SchemaRef}, }; -use daft_dsl::ExprRef; use daft_stats::{PartitionSpec, TableMetadata, TableStatistics}; use file_format::FileFormatConfig; use itertools::Itertools; @@ -962,7 +962,7 @@ mod test { use common_display::{DisplayAs, DisplayLevel}; use common_error::DaftResult; - use daft_core::{datatypes::TimeUnit, schema::Schema}; + use daft_schema::{schema::Schema, time_unit::TimeUnit}; use itertools::Itertools; use crate::{ diff --git a/src/daft-scan/src/python.rs b/src/daft-scan/src/python.rs index b4682efb91..4086ec9800 100644 --- a/src/daft-scan/src/python.rs +++ b/src/daft-scan/src/python.rs @@ -41,9 +41,9 @@ impl PartialEq for PythonTablesFactoryArgs { pub mod pylib { use common_error::DaftResult; use common_py_serde::impl_bincode_py_state_serialization; - use daft_core::python::field::PyField; - use daft_core::schema::SchemaRef; use daft_dsl::python::PyExpr; + use daft_schema::python::field::PyField; + use daft_schema::schema::SchemaRef; use daft_stats::PartitionSpec; use daft_stats::TableMetadata; @@ -56,7 +56,7 @@ pub mod pylib { use pyo3::types::PyList; use std::sync::Arc; - use daft_core::python::schema::PySchema; + use daft_schema::python::schema::PySchema; use pyo3::pyclass; use serde::{Deserialize, Serialize}; @@ -220,7 +220,7 @@ pub mod pylib { fn partitioning_keys(&self) -> &[crate::PartitionField] { &self.partitioning_keys } - fn schema(&self) -> daft_core::schema::SchemaRef { + fn schema(&self) -> daft_schema::schema::SchemaRef { self.schema.clone() } fn can_absorb_filter(&self) -> bool { diff --git a/src/daft-scheduler/src/adaptive.rs b/src/daft-scheduler/src/adaptive.rs index b3cf84a919..ae5b1267c0 100644 --- a/src/daft-scheduler/src/adaptive.rs +++ b/src/daft-scheduler/src/adaptive.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use common_daft_config::DaftExecutionConfig; -use daft_core::schema::Schema; +use daft_core::prelude::Schema; use crate::PhysicalPlanScheduler; use daft_plan::InMemoryInfo; diff --git a/src/daft-scheduler/src/scheduler.rs b/src/daft-scheduler/src/scheduler.rs index 325fce2533..5dd24abae5 100644 --- a/src/daft-scheduler/src/scheduler.rs +++ b/src/daft-scheduler/src/scheduler.rs @@ -9,8 +9,8 @@ use serde::{Deserialize, Serialize}; use { common_daft_config::PyDaftExecutionConfig, common_io_config::IOConfig, - daft_core::python::schema::PySchema, - daft_core::schema::SchemaRef, + daft_core::prelude::SchemaRef, + daft_core::python::PySchema, daft_dsl::python::PyExpr, daft_dsl::Expr, daft_io::FileFormat, diff --git a/src/daft-schema/Cargo.toml b/src/daft-schema/Cargo.toml new file mode 100644 index 0000000000..612505e385 --- /dev/null +++ b/src/daft-schema/Cargo.toml @@ -0,0 +1,31 @@ +[dependencies] +arrow2 = {workspace = true} +common-arrow-ffi = {path = "../common/arrow-ffi", default-features = false} +common-display = {path = "../common/display", default-features = false} +common-error = {path = "../common/error", default-features = false} +common-py-serde = {path = "../common/py-serde", default-features = false} +common-version = {path = "../common/version", default-features = false} +html-escape = {workspace = true} +indexmap = {workspace = true} +num-derive = {workspace = true} +num-traits = {workspace = true} +pyo3 = {workspace = true, optional = true} +serde = {workspace = true, features = ["rc"]} +serde_json = {workspace = true} + +[features] +python = [ + "dep:pyo3", + "common-error/python", + "common-display/python", + "common-py-serde/python", + "common-arrow-ffi/python" +] + +[package] +edition = {workspace = true} +name = "daft-schema" +version = {workspace = true} + +[package.metadata.cargo-machete] +ignored = ["num_traits"] # needed by num-derive diff --git a/src/daft-core/src/datatypes/dtype.rs b/src/daft-schema/src/dtype.rs similarity index 99% rename from src/daft-core/src/datatypes/dtype.rs rename to src/daft-schema/src/dtype.rs index 140ffd45d6..c73a240efd 100644 --- a/src/daft-core/src/datatypes/dtype.rs +++ b/src/daft-schema/src/dtype.rs @@ -2,7 +2,7 @@ use std::fmt::{Display, Formatter, Result}; use arrow2::datatypes::DataType as ArrowType; -use crate::datatypes::{field::Field, image_mode::ImageMode, time_unit::TimeUnit}; +use crate::{field::Field, image_mode::ImageMode, time_unit::TimeUnit}; use common_error::{DaftError, DaftResult}; @@ -110,8 +110,8 @@ impl DataTypePayload { pub fn new(datatype: &DataType) -> Self { DataTypePayload { datatype: datatype.clone(), - daft_version: crate::VERSION.into(), - daft_build_type: crate::DAFT_BUILD_TYPE.into(), + daft_version: common_version::VERSION.into(), + daft_build_type: common_version::DAFT_BUILD_TYPE.into(), } } } diff --git a/src/daft-core/src/datatypes/field.rs b/src/daft-schema/src/field.rs similarity index 99% rename from src/daft-core/src/datatypes/field.rs rename to src/daft-schema/src/field.rs index d9cc37d09b..219127c416 100644 --- a/src/daft-core/src/datatypes/field.rs +++ b/src/daft-schema/src/field.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use arrow2::datatypes::Field as ArrowField; -use crate::datatypes::dtype::DataType; +use crate::dtype::DataType; use common_error::{DaftError, DaftResult}; use serde::{Deserialize, Serialize}; diff --git a/src/daft-core/src/datatypes/image_format.rs b/src/daft-schema/src/image_format.rs similarity index 69% rename from src/daft-core/src/datatypes/image_format.rs rename to src/daft-schema/src/image_format.rs index c16b4b1043..1a622d25a1 100644 --- a/src/daft-core/src/datatypes/image_format.rs +++ b/src/daft-schema/src/image_format.rs @@ -68,31 +68,6 @@ impl FromStr for ImageFormat { } } -impl From for ImageFormat { - fn from(image_format: image::ImageFormat) -> Self { - match image_format { - image::ImageFormat::Png => ImageFormat::PNG, - image::ImageFormat::Jpeg => ImageFormat::JPEG, - image::ImageFormat::Tiff => ImageFormat::TIFF, - image::ImageFormat::Gif => ImageFormat::GIF, - image::ImageFormat::Bmp => ImageFormat::BMP, - _ => unimplemented!("Image format {:?} is not supported", image_format), - } - } -} - -impl From for image::ImageFormat { - fn from(image_format: ImageFormat) -> Self { - match image_format { - ImageFormat::PNG => image::ImageFormat::Png, - ImageFormat::JPEG => image::ImageFormat::Jpeg, - ImageFormat::TIFF => image::ImageFormat::Tiff, - ImageFormat::GIF => image::ImageFormat::Gif, - ImageFormat::BMP => image::ImageFormat::Bmp, - } - } -} - impl Display for ImageFormat { fn fmt(&self, f: &mut Formatter) -> Result { // Leverage Debug trait implementation, which will already return the enum variant as a string. diff --git a/src/daft-core/src/datatypes/image_mode.rs b/src/daft-schema/src/image_mode.rs similarity index 78% rename from src/daft-core/src/datatypes/image_mode.rs rename to src/daft-schema/src/image_mode.rs index b6ee7afd66..6f69d183ee 100644 --- a/src/daft-core/src/datatypes/image_mode.rs +++ b/src/daft-schema/src/image_mode.rs @@ -1,4 +1,4 @@ -use crate::datatypes::DataType; +use crate::dtype::DataType; use num_derive::FromPrimitive; #[cfg(feature = "python")] use pyo3::{exceptions::PyValueError, prelude::*}; @@ -123,52 +123,6 @@ impl ImageMode { } } -impl From for image::ColorType { - fn from(image_mode: ImageMode) -> image::ColorType { - use image::ColorType; - use ImageMode::*; - - match image_mode { - L => ColorType::L8, - LA => ColorType::La8, - RGB => ColorType::Rgb8, - RGBA => ColorType::Rgba8, - L16 => ColorType::L16, - LA16 => ColorType::La16, - RGB16 => ColorType::Rgb16, - RGBA16 => ColorType::Rgba16, - RGB32F => ColorType::Rgb32F, - RGBA32F => ColorType::Rgba32F, - } - } -} - -impl TryFrom for ImageMode { - type Error = DaftError; - - fn try_from(color: image::ColorType) -> DaftResult { - use image::ColorType; - use ImageMode::*; - - match color { - ColorType::L8 => Ok(L), - ColorType::La8 => Ok(LA), - ColorType::Rgb8 => Ok(RGB), - ColorType::Rgba8 => Ok(RGBA), - ColorType::L16 => Ok(L16), - ColorType::La16 => Ok(LA16), - ColorType::Rgb16 => Ok(RGB16), - ColorType::Rgba16 => Ok(RGBA16), - ColorType::Rgb32F => Ok(RGB32F), - ColorType::Rgba32F => Ok(RGBA32F), - _ => Err(DaftError::ValueError(format!( - "Color type {:?} is not supported.", - color - ))), - } - } -} - impl FromStr for ImageMode { type Err = DaftError; diff --git a/src/daft-schema/src/lib.rs b/src/daft-schema/src/lib.rs new file mode 100644 index 0000000000..60eec21e62 --- /dev/null +++ b/src/daft-schema/src/lib.rs @@ -0,0 +1,10 @@ +pub mod dtype; +pub mod field; +pub mod image_format; +pub mod image_mode; +pub mod prelude; + +#[cfg(feature = "python")] +pub mod python; +pub mod schema; +pub mod time_unit; diff --git a/src/daft-schema/src/prelude.rs b/src/daft-schema/src/prelude.rs new file mode 100644 index 0000000000..442ef37f62 --- /dev/null +++ b/src/daft-schema/src/prelude.rs @@ -0,0 +1,11 @@ +pub use crate::dtype::DataType; + +pub use crate::field::{Field, FieldID, FieldRef}; + +pub use crate::image_format::ImageFormat; + +pub use crate::image_mode::ImageMode; + +pub use crate::schema::{Schema, SchemaRef}; + +pub use crate::time_unit::{infer_timeunit_from_format_string, TimeUnit}; diff --git a/src/daft-core/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs similarity index 98% rename from src/daft-core/src/python/datatype.rs rename to src/daft-schema/src/python/datatype.rs index e5615bd072..42990c6f0b 100644 --- a/src/daft-core/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -1,7 +1,8 @@ -use crate::{ - datatypes::{DataType, Field, ImageMode, TimeUnit}, - ffi, -}; +use crate::dtype::DataType; +use crate::field::Field; +use crate::image_mode::ImageMode; + +use common_arrow_ffi as ffi; use common_py_serde::impl_bincode_py_state_serialization; use pyo3::{ @@ -12,6 +13,8 @@ use pyo3::{ }; use serde::{Deserialize, Serialize}; +use crate::time_unit::TimeUnit; + #[pyclass] #[derive(Clone)] pub struct PyTimeUnit { diff --git a/src/daft-core/src/python/field.rs b/src/daft-schema/src/python/field.rs similarity index 76% rename from src/daft-core/src/python/field.rs rename to src/daft-schema/src/python/field.rs index e6e54491aa..214a762dec 100644 --- a/src/daft-core/src/python/field.rs +++ b/src/daft-schema/src/python/field.rs @@ -2,20 +2,20 @@ use pyo3::prelude::*; use serde::{Deserialize, Serialize}; use super::datatype::PyDataType; -use crate::datatypes; +use crate::field::Field; use common_py_serde::impl_bincode_py_state_serialization; #[pyclass(module = "daft.daft")] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PyField { - pub field: datatypes::Field, + pub field: Field, } #[pymethods] impl PyField { #[staticmethod] pub fn create(name: &str, data_type: PyDataType) -> PyResult { - Ok(datatypes::Field::new(name, data_type.dtype).into()) + Ok(Field::new(name, data_type.dtype).into()) } pub fn name(&self) -> PyResult { @@ -33,13 +33,13 @@ impl PyField { impl_bincode_py_state_serialization!(PyField); -impl From for PyField { - fn from(field: datatypes::Field) -> Self { +impl From for PyField { + fn from(field: Field) -> Self { PyField { field } } } -impl From for datatypes::Field { +impl From for Field { fn from(item: PyField) -> Self { item.field } diff --git a/src/daft-schema/src/python/mod.rs b/src/daft-schema/src/python/mod.rs new file mode 100644 index 0000000000..3241eaa90c --- /dev/null +++ b/src/daft-schema/src/python/mod.rs @@ -0,0 +1,20 @@ +use pyo3::prelude::*; +pub mod datatype; +pub mod field; +pub mod schema; + +use crate::image_format::ImageFormat; +use crate::image_mode::ImageMode; +pub use datatype::PyDataType; +pub use datatype::PyTimeUnit; + +pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + parent.add_class::()?; + + Ok(()) +} diff --git a/src/daft-core/src/python/schema.rs b/src/daft-schema/src/python/schema.rs similarity index 96% rename from src/daft-core/src/python/schema.rs rename to src/daft-schema/src/python/schema.rs index eaa2ff78ac..533867c7f6 100644 --- a/src/daft-core/src/python/schema.rs +++ b/src/daft-schema/src/python/schema.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use super::datatype::PyDataType; use super::field::PyField; -use crate::datatypes; +use crate::field::Field; use crate::schema; use common_py_serde::impl_bincode_py_state_serialization; @@ -68,7 +68,7 @@ impl PySchema { ) -> PyResult { let fields = names_and_types .iter() - .map(|(name, pydtype)| datatypes::Field::new(name, pydtype.clone().into())) + .map(|(name, pydtype)| Field::new(name, pydtype.clone().into())) .collect(); let schema = schema::Schema::new(fields)?; Ok(PySchema { diff --git a/src/daft-core/src/schema.rs b/src/daft-schema/src/schema.rs similarity index 94% rename from src/daft-core/src/schema.rs rename to src/daft-schema/src/schema.rs index 135605b264..b3522633e4 100644 --- a/src/daft-core/src/schema.rs +++ b/src/daft-schema/src/schema.rs @@ -1,19 +1,18 @@ use std::{ - borrow::Cow, collections::{hash_map::DefaultHasher, HashSet}, fmt::{Display, Formatter, Result}, hash::{Hash, Hasher}, sync::Arc, }; -use common_display::DisplayAs; +use common_display::{ + table_display::{make_comfy_table, make_schema_vertical_table}, + DisplayAs, +}; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; -use crate::{ - datatypes::Field, - utils::display_table::{make_comfy_table, make_schema_vertical_table}, -}; +use crate::field::Field; use common_error::{DaftError, DaftResult}; @@ -219,11 +218,12 @@ impl Schema { let table = make_comfy_table( self.fields .values() - .map(Cow::Borrowed) + .map(|field| format!("{}\n---\n{}", field.name, field.dtype)) .collect::>() .as_slice(), None, None, + None, ); format!("{}\n", table) } @@ -291,13 +291,17 @@ impl Default for Schema { impl Display for Schema { // Produces an ASCII table. fn fmt(&self, f: &mut Formatter) -> Result { - let table = make_schema_vertical_table( - self.fields - .values() - .map(Cow::Borrowed) - .collect::>() - .as_slice(), - ); + let names = self + .fields + .values() + .map(|f| f.name.clone()) + .collect::>(); + let dtypes = self + .fields + .values() + .map(|f| format!("{}", f.dtype)) + .collect::>(); + let table = make_schema_vertical_table(&names, &dtypes); writeln!(f, "{table}") } } diff --git a/src/daft-core/src/datatypes/time_unit.rs b/src/daft-schema/src/time_unit.rs similarity index 100% rename from src/daft-core/src/datatypes/time_unit.rs rename to src/daft-schema/src/time_unit.rs diff --git a/src/daft-table/Cargo.toml b/src/daft-table/Cargo.toml index 17429f260a..0b42463974 100644 --- a/src/daft-table/Cargo.toml +++ b/src/daft-table/Cargo.toml @@ -1,6 +1,8 @@ [dependencies] arrow2 = {workspace = true} comfy-table = {workspace = true} +common-arrow-ffi = {path = "../common/arrow-ffi", default-features = false} +common-display = {path = "../common/display", default-features = false} common-error = {path = "../common/error", default-features = false} daft-core = {path = "../daft-core", default-features = false} daft-dsl = {path = "../daft-dsl", default-features = false} @@ -11,7 +13,7 @@ rand = {workspace = true} serde = {workspace = true} [features] -python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python"] +python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "common-arrow-ffi/python", "common-display/python"] [package] edition = {workspace = true} diff --git a/src/daft-table/src/ffi.rs b/src/daft-table/src/ffi.rs index 8554cc997e..a313cd4b9d 100644 --- a/src/daft-table/src/ffi.rs +++ b/src/daft-table/src/ffi.rs @@ -6,7 +6,7 @@ use pyo3::types::PyList; use crate::Table; use common_error::DaftResult; use daft_core::{ - schema::SchemaRef, + prelude::SchemaRef, series::Series, utils::arrow::{cast_array_for_daft_if_needed, cast_array_from_daft_if_needed}, }; @@ -31,7 +31,7 @@ pub fn record_batches_to_table( let columns = pycolumns .downcast::()? .into_iter() - .map(daft_core::ffi::array_to_rust) + .map(common_arrow_ffi::array_to_rust) .collect::>>()?; if names.len() != columns.len() { return Err(PyValueError::new_err(format!("Error when converting Arrow Record Batches to Daft Table. Expected: {} columns, got: {}", names.len(), columns.len()))); @@ -64,7 +64,7 @@ pub fn table_to_record_batch(table: &Table, py: Python, pyarrow: &PyModule) -> P let s = table.get_column_by_index(i)?; let arrow_array = s.to_arrow(); let arrow_array = cast_array_from_daft_if_needed(arrow_array.to_boxed()); - let py_array = daft_core::ffi::to_py_array(arrow_array, py, pyarrow)?; + let py_array = common_arrow_ffi::to_py_array(arrow_array, py, pyarrow)?; arrays.push(py_array); names.push(s.name().to_string()); } diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index c190171d4a..02052109a6 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -2,12 +2,11 @@ #![feature(let_chains)] use core::slice; -use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter, Result}; +use common_display::table_display::{make_comfy_table, StrValue}; use daft_core::array::ops::full::FullNull; -use daft_core::utils::display_table::make_comfy_table; use num_traits::ToPrimitive; use daft_core::array::ops::{DaftApproxCountDistinctAggable, DaftHllSketchAggable, GroupIndices}; @@ -751,14 +750,21 @@ impl Table { } pub fn to_comfy_table(&self, max_col_width: Option) -> comfy_table::Table { + let str_values = self + .columns + .iter() + .map(|s| s as &dyn StrValue) + .collect::>(); + make_comfy_table( self.schema .fields .values() - .map(Cow::Borrowed) + .map(|field| format!("{}\n---\n{}", field.name, field.dtype)) .collect::>() .as_slice(), - Some(self.columns.iter().collect::>().as_slice()), + Some(str_values.as_slice()), + Some(self.len()), max_col_width, ) } diff --git a/src/daft-table/src/ops/agg.rs b/src/daft-table/src/ops/agg.rs index d31588747a..51dd7d2587 100644 --- a/src/daft-table/src/ops/agg.rs +++ b/src/daft-table/src/ops/agg.rs @@ -73,7 +73,7 @@ impl Table { inputs: &[ExprRef], group_by: &[ExprRef], ) -> DaftResult { - use daft_core::{array::ops::IntoGroups, schema::Schema}; + use daft_core::array::ops::IntoGroups; use daft_dsl::functions::python::PythonUDF; let udf = match func { diff --git a/src/daft-table/src/probe_table/mod.rs b/src/daft-table/src/probe_table/mod.rs index 29953ffd10..ff985a1051 100644 --- a/src/daft-table/src/probe_table/mod.rs +++ b/src/daft-table/src/probe_table/mod.rs @@ -5,7 +5,7 @@ use common_error::DaftResult; use daft_core::utils::dyn_compare::{build_dyn_multi_array_compare, MultiDynArrayComparator}; use daft_core::utils::identity_hash_set::IdentityBuildHasher; -use daft_core::{array::ops::as_arrow::AsArrow, schema::SchemaRef}; +use daft_core::{array::ops::as_arrow::AsArrow, prelude::SchemaRef}; use crate::{ops::hash::IndexHash, Table}; diff --git a/src/daft-table/src/python.rs b/src/daft-table/src/python.rs index 83afd28967..0c39ea2c40 100644 --- a/src/daft-table/src/python.rs +++ b/src/daft-table/src/python.rs @@ -9,8 +9,8 @@ use common_error::DaftError; use daft_core::prelude::*; use daft_dsl::python::PyExpr; -use daft_core::python::schema::PySchema; use daft_core::python::series::PySeries; +use daft_core::python::PySchema; #[pyclass] #[derive(Clone)] diff --git a/src/lib.rs b/src/lib.rs index 12e926f2e3..bdc3b0b2c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,12 +55,12 @@ pub mod pylib { #[pyfunction] pub fn version() -> &'static str { - daft_core::VERSION + common_version::VERSION } #[pyfunction] pub fn build_type() -> &'static str { - daft_core::DAFT_BUILD_TYPE + common_version::DAFT_BUILD_TYPE } #[pyfunction]