Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] 1606 - Adding hour expression in date util #1637

Merged
merged 2 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ class PyExpr:
def is_nan(self) -> PyExpr: ...
def dt_date(self) -> PyExpr: ...
def dt_day(self) -> PyExpr: ...
def dt_hour(self) -> PyExpr: ...
def dt_month(self) -> PyExpr: ...
def dt_year(self) -> PyExpr: ...
def dt_day_of_week(self) -> PyExpr: ...
Expand Down Expand Up @@ -761,6 +762,7 @@ class PySeries:
def is_nan(self) -> PySeries: ...
def dt_date(self) -> PySeries: ...
def dt_day(self) -> PySeries: ...
def dt_hour(self) -> PySeries: ...
def dt_month(self) -> PySeries: ...
def dt_year(self) -> PySeries: ...
def dt_day_of_week(self) -> PySeries: ...
Expand Down
11 changes: 11 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,17 @@
"""
return Expression._from_pyexpr(self._expr.dt_day())

def hour(self) -> Expression:
"""Retrieves the day for a datetime column

Example:
>>> col("x").dt.day()

Returns:
Expression: a UInt32 expression with just the day extracted from a datetime column
"""
return Expression._from_pyexpr(self._expr.dt_hour())

Check warning on line 516 in daft/expressions/expressions.py

View check run for this annotation

Codecov / codecov/patch

daft/expressions/expressions.py#L516

Added line #L516 was not covered by tests

def month(self) -> Expression:
"""Retrieves the month for a datetime column

Expand Down
3 changes: 3 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,9 @@ def date(self) -> Series:
def day(self) -> Series:
return Series._from_pyseries(self._series.dt_day())

def hour(self) -> Series:
return Series._from_pyseries(self._series.dt_hour())

def month(self) -> Series:
return Series._from_pyseries(self._series.dt_month())

Expand Down
38 changes: 37 additions & 1 deletion src/daft-core/src/array/ops/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
DataType,
};
use arrow2::compute::arithmetics::ArraySub;
use chrono::NaiveDate;
use chrono::{NaiveDate, Timelike};
use common_error::{DaftError, DaftResult};

use super::as_arrow::AsArrow;
Expand Down Expand Up @@ -107,4 +107,40 @@ impl TimestampArray {
Int32Array::from((self.name(), Box::new(date_arrow))),
))
}

pub fn hour(&self) -> DaftResult<UInt32Array> {
let physical = self.physical.as_arrow();
let DataType::Timestamp(timeunit, tz) = self.data_type() else {
unreachable!("Timestamp array must have Timestamp datatype")
};
let tu = timeunit.to_arrow();
let date_arrow = match tz {
Some(tz) => match arrow2::temporal_conversions::parse_offset(tz) {
Ok(tz) => Ok(arrow2::array::UInt32Array::from_iter(physical.iter().map(
|ts| {
ts.map(|ts| {
arrow2::temporal_conversions::timestamp_to_datetime(*ts, tu, &tz).hour()
})
},
))),
Err(e) => Err(DaftError::TypeError(format!(
"Cannot parse timezone in Timestamp datatype: {}, error: {}",
tz, e
))),
},
None => Ok(arrow2::array::UInt32Array::from_iter(physical.iter().map(
|ts| {
ts.map(|ts| {
arrow2::temporal_conversions::timestamp_to_naive_datetime(*ts, tu).hour()
})
},
))),
}?;

Ok(UInt32Array::new(
std::sync::Arc::new(Field::new(self.name(), DataType::UInt32)),
Box::new(date_arrow),
)
.unwrap_or(UInt32Array::from(("hour", vec![0]))))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't have to unwrap the UInt32Array::new and then wrap it in Ok. You should be able to return the result from UInt32Array::new directly!

UInt32Array::new(
            std::sync::Arc::new(Field::new(self.name(), DataType::UInt32)),
            Box::new(date_arrow),
        )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, not sure what I was thinking. Fixing it now.

}
}
4 changes: 4 additions & 0 deletions src/daft-core/src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ impl PySeries {
Ok(self.series.dt_day()?.into())
}

pub fn dt_hour(&self) -> PyResult<Self> {
Ok(self.series.dt_hour()?.into())
}

pub fn dt_month(&self) -> PyResult<Self> {
Ok(self.series.dt_month()?.into())
}
Expand Down
13 changes: 13 additions & 0 deletions src/daft-core/src/series/ops/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ impl Series {
}
}

pub fn dt_hour(&self) -> DaftResult<Self> {
match self.data_type() {
DataType::Timestamp(..) => {
let ts_array = self.downcast::<TimestampArray>()?;
Ok(ts_array.hour()?.into_series())
}
_ => Err(DaftError::ComputeError(format!(
"Can only run day() operation on temporal types, got {}",
self.data_type()
))),
}
}

pub fn dt_month(&self) -> DaftResult<Self> {
match self.data_type() {
DataType::Date => {
Expand Down
47 changes: 47 additions & 0 deletions src/daft-dsl/src/functions/temporal/hour.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use common_error::{DaftError, DaftResult};
use daft_core::{
datatypes::{DataType, Field},
schema::Schema,
series::Series,
};

use crate::Expr;

use super::super::FunctionEvaluator;

pub(super) struct HourEvaluator {}

impl FunctionEvaluator for HourEvaluator {
fn fn_name(&self) -> &'static str {
"hour"
}

fn to_field(&self, inputs: &[Expr], schema: &Schema, _: &Expr) -> DaftResult<Field> {
match inputs {
[input] => match input.to_field(schema) {
Ok(field) if field.dtype.is_temporal() => {
Ok(Field::new(field.name, DataType::UInt32))
}
Ok(field) => Err(DaftError::TypeError(format!(
"Expected input to hour to be temporal, got {}",
field.dtype
))),
Err(e) => Err(e),
},
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, inputs: &[Series], _: &Expr) -> DaftResult<Series> {
match inputs {
[input] => input.dt_hour(),
_ => Err(DaftError::ValueError(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}
}
14 changes: 12 additions & 2 deletions src/daft-dsl/src/functions/temporal/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
mod date;
mod day;
mod day_of_week;
mod hour;
mod month;
mod year;

use serde::{Deserialize, Serialize};

use crate::functions::temporal::{
date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, month::MonthEvaluator,
year::YearEvaluator,
date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, hour::HourEvaluator,
month::MonthEvaluator, year::YearEvaluator,
};
use crate::Expr;

Expand All @@ -17,6 +18,7 @@ use super::FunctionEvaluator;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum TemporalExpr {
Day,
Hour,
Month,
Year,
DayOfWeek,
Expand All @@ -29,6 +31,7 @@ impl TemporalExpr {
use TemporalExpr::*;
match self {
Day => &DayEvaluator {},
Hour => &HourEvaluator {},
Month => &MonthEvaluator {},
Year => &YearEvaluator {},
DayOfWeek => &DayOfWeekEvaluator {},
Expand All @@ -51,6 +54,13 @@ pub fn day(input: &Expr) -> Expr {
}
}

pub fn hour(input: &Expr) -> Expr {
Expr::Function {
func: super::FunctionExpr::Temporal(TemporalExpr::Hour),
inputs: vec![input.clone()],
}
}

pub fn month(input: &Expr) -> Expr {
Expr::Function {
func: super::FunctionExpr::Temporal(TemporalExpr::Month),
Expand Down
5 changes: 5 additions & 0 deletions src/daft-dsl/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ impl PyExpr {
Ok(day(&self.expr).into())
}

pub fn dt_hour(&self) -> PyResult<Self> {
use functions::temporal::hour;
Ok(hour(&self.expr).into())
}

pub fn dt_month(&self) -> PyResult<Self> {
use functions::temporal::month;
Ok(month(&self.expr).into())
Expand Down
19 changes: 19 additions & 0 deletions tests/series/test_temporal_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ def ts_maker(d):
assert expected == days.to_pylist()


def test_series_timestamp_hour() -> None:
from datetime import datetime

def ts_maker(h):
if h is None:
return None
return datetime(2023, 1, 26, h, 1, 1)

input = [1, 5, 14, None, 23, None, 21]

input_ts = list(map(ts_maker, input))
s = Series.from_pylist(input_ts).cast(DataType.timestamp(TimeUnit.ms()))
days = s.dt.hour()

assert days.datatype() == DataType.uint32()

assert input == days.to_pylist()


@pytest.mark.parametrize("tz", [None, "UTC", "+08:00", "Asia/Singapore"])
def test_series_timestamp_month_operation(tz) -> None:
from datetime import datetime
Expand Down
Loading