Skip to content

Commit

Permalink
feat: Series.str.to_datetime (#1131)
Browse files Browse the repository at this point in the history
* add series to_datetime to nw str namespace, add pyarrow example

* add to_datetime to api-ref
  • Loading branch information
raisadz authored Oct 4, 2024
1 parent 915d84c commit a2a22ce
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- slice
- starts_with
- strip_chars
- to_datetime
- tail
show_source: false
show_bases: false
14 changes: 11 additions & 3 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2516,17 +2516,20 @@ def to_datetime(self, format: str) -> Expr: # noqa: A002
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": ["2020-01-01", "2020-01-02"]})
>>> df_pl = pl.DataFrame({"a": ["2020-01-01", "2020-01-02"]})
>>> data = ["2020-01-01", "2020-01-02"]
>>> df_pd = pd.DataFrame({"a": data})
>>> df_pl = pl.DataFrame({"a": data})
>>> df_pa = pa.table({"a": data})
We define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").str.to_datetime(format="%Y-%m-%d"))
We can then pass either pandas or Polars to `func`:
We can then pass any supported library such as pandas, Polars, or PyArrow:
>>> func(df_pd)
a
Expand All @@ -2542,6 +2545,11 @@ def to_datetime(self, format: str) -> Expr: # noqa: A002
│ 2020-01-01 00:00:00 │
│ 2020-01-02 00:00:00 │
└─────────────────────┘
>>> func(df_pa)
pyarrow.Table
a: timestamp[us]
----
a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]]
"""
return self._expr.__class__(
lambda plx: self._expr._call(plx).str.to_datetime(format=format)
Expand Down
57 changes: 57 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3083,6 +3083,63 @@ def to_lowercase(self) -> Series:
self._narwhals_series._compliant_series.str.to_lowercase()
)

def to_datetime(self, format: str) -> Series: # noqa: A002
"""
Parse Series with strings to a Series with Datetime dtype.
Notes:
pandas defaults to nanosecond time unit, Polars to microsecond.
Prior to pandas 2.0, nanoseconds were the only time unit supported
in pandas, with no ability to set any other one. The ability to
set the time unit in pandas, if the version permits, will arrive.
Arguments:
format: Format to parse strings with. Must be passed, as different
dataframe libraries have different ways of auto-inferring
formats.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = ["2020-01-01", "2020-01-02"]
>>> s_pd = pd.Series(data)
>>> s_pl = pl.Series(data)
>>> s_pa = pa.chunked_array([data])
We define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.str.to_datetime(format="%Y-%m-%d")
We can then pass any supported library such as pandas, Polars, or PyArrow::
>>> func(s_pd)
0 2020-01-01
1 2020-01-02
dtype: datetime64[ns]
>>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (2,)
Series: '' [datetime[μs]]
[
2020-01-01 00:00:00
2020-01-02 00:00:00
]
>>> func(s_pa) # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at 0x...>
[
[
2020-01-01 00:00:00.000000,
2020-01-02 00:00:00.000000
]
]
"""
return self._narwhals_series._from_compliant_series(
self._narwhals_series._compliant_series.str.to_datetime(format=format)
)


class SeriesDateTimeNamespace:
def __init__(self, series: Series) -> None:
Expand Down
16 changes: 16 additions & 0 deletions tests/expr_and_series/str/to_datetime_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

import narwhals.stable.v1 as nw
from tests.utils import Constructor

Expand All @@ -18,3 +20,17 @@ def test_to_datetime(constructor: Constructor) -> None:
.item(row=0, column="b")
)
assert str(result) == expected


def test_to_datetime_series(constructor_eager: Any) -> None:
if "cudf" in str(constructor_eager): # pragma: no cover
expected = "2020-01-01T12:34:56.000000000"
else:
expected = "2020-01-01 12:34:56"

result = (
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime(
format="%Y-%m-%dT%H:%M:%S"
)
).item(0)
assert str(result) == expected

0 comments on commit a2a22ce

Please sign in to comment.