Skip to content

Commit

Permalink
feat: add Expr and Series log and log10 methods
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Sep 23, 2024
1 parent 3f1619d commit 8a980b7
Show file tree
Hide file tree
Showing 11 changed files with 290 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
- is_null
- is_unique
- len
- log
- log10
- max
- mean
- min
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
- is_unique
- item
- len
- log
- log10
- max
- mean
- min
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,12 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

def log(self: Self, base: float) -> Self:
return reuse_series_implementation(self, "log", base)

def log10(self: Self) -> Self:
return reuse_series_implementation(self, "log10")

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,16 @@ def mode(self: Self) -> ArrowSeries:
plx.col(col_token) == plx.col(col_token).max()
)[self.name]

def log(self: Self, base: float) -> Self:
import pyarrow.compute as pc # ignore-banned-import()

return self._from_native_series(pc.logb(self._native_series, base))

def log10(self: Self) -> Self:
import pyarrow.compute as pc # ignore-banned-import()

return self._from_native_series(pc.log10(self._native_series))

@property
def shape(self) -> tuple[int]:
return (len(self._native_series),)
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,12 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

def log(self: Self, base: float) -> Self:
return reuse_series_implementation(self, "log", base)

def log10(self: Self) -> Self:
return reuse_series_implementation(self, "log10")

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,16 @@ def mode(self: Self) -> Self:
result.name = native_series.name
return self._from_native_series(result)

def log(self: Self, base: float) -> Self:
import numpy as np # ignore-banned-import()

return self._from_native_series(np.log(self._native_series) / np.log(base))

def log10(self: Self) -> Self:
import numpy as np # ignore-banned-import()

return self._from_native_series(np.log10(self._native_series))

@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
99 changes: 99 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import math
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -1953,6 +1954,104 @@ def mode(self: Self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).mode())

def log(self: Self, base: float = math.e) -> Self:
r"""
Compute the logarithm to a given base.
Arguments:
base: Given base, defaults to `e`
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"values": [1, 2, 3]}
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(values_log_2=nw.col("values").log(base=2))
We can then pass any supported library such as pandas, Polars (eager),
or PyArrow to `func`:
>>> func(pd.DataFrame(data))
values values_log_2
0 1 0.000000
1 2 1.000000
2 3 1.584963
>>> func(pl.DataFrame(data))
shape: (3, 2)
┌────────┬──────────────┐
│ values ┆ values_log_2 │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞════════╪══════════════╡
│ 1 ┆ 0.0 │
│ 2 ┆ 1.0 │
│ 3 ┆ 1.584963 │
└────────┴──────────────┘
>>> func(pa.table(data))
pyarrow.Table
values: int64
values_log_2: double
----
values: [[1,2,3]]
values_log_2: [[0,1,1.5849625007211563]]
"""
return self.__class__(lambda plx: self._call(plx).log(base=base))

def log10(self: Self) -> Self:
r"""
Compute the base 10 logarithm of the input array, element-wise.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"values": [1.0, 2.0, 4.0]}
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(values_log_10=nw.col("values").log10())
We can then pass any supported library such as pandas, Polars (eager),
or PyArrow to `func`:
>>> func(pd.DataFrame(data))
values values_log_10
0 1.0 0.00000
1 2.0 0.30103
2 4.0 0.60206
>>> func(pl.DataFrame(data))
shape: (3, 2)
┌────────┬───────────────┐
│ values ┆ values_log_10 │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞════════╪═══════════════╡
│ 1.0 ┆ 0.0 │
│ 2.0 ┆ 0.30103 │
│ 4.0 ┆ 0.60206 │
└────────┴───────────────┘
>>> func(pa.table(data))
pyarrow.Table
values: double
values_log_10: double
----
values: [[1,2,4]]
values_log_10: [[0,0.3010299956639812,0.6020599913279624]]
"""
return self.__class__(lambda plx: self._call(plx).log10())

@property
def str(self: Self) -> ExprStringNamespace:
return ExprStringNamespace(self)
Expand Down
97 changes: 97 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import math
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
Expand Down Expand Up @@ -2406,6 +2407,102 @@ def mode(self: Self) -> Self:
"""
return self._from_compliant_series(self._compliant_series.mode())

def log(self: Self, base: float = math.e) -> Self:
r"""
Compute the logarithm to a given base.
Arguments:
base: Given base, defaults to `e`
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [1, 2, 3]
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.log(base=2)
We can then pass any supported library such as pandas, Polars (eager),
or PyArrow to `func`:
>>> func(pd.Series(data))
0 0.000000
1 1.000000
2 1.584963
dtype: float64
>>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: '' [f64]
[
0.0
1.0
1.584963
]
>>> func(pa.chunked_array([data])) # doctest: +NORMALIZE_WHITESPACE
<pyarrow.lib.ChunkedArray object at ...>
[
[
0,
1,
1.5849625007211563
]
]
"""
return self._from_compliant_series(self._compliant_series.log(base=base))

def log10(self: Self) -> Self:
r"""
Compute the base 10 logarithm of the input array, element-wise.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [1.0, 2.0, 4.0]
We define a library agnostic function:
>>> @nw.narwhalify
... def func(s):
... return s.log10()
We can then pass any supported library such as pandas, Polars (eager),
or PyArrow to `func`:
>>> func(pd.Series(data))
0 0.00000
1 0.30103
2 0.60206
dtype: float64
>>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (3,)
Series: '' [f64]
[
0.0
0.30103
0.60206
]
>>> func(pa.chunked_array([data])) # doctest: +NORMALIZE_WHITESPACE
<pyarrow.lib.ChunkedArray object at ...>
[
[
0,
0.3010299956639812,
0.6020599913279624
]
]
"""
return self._from_compliant_series(self._compliant_series.log10())

@property
def str(self) -> SeriesStringNamespace:
return SeriesStringNamespace(self)
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ filterwarnings = [
'ignore:.*You are using pyarrow version',
'ignore:.*but when imported by',
'ignore:Distributing .*This may take some time',
'ignore:.*The default coalesce behavior'
'ignore:.*The default coalesce behavior',
'ignore::RuntimeWarning'
]
xfail_strict = true
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
Expand Down
28 changes: 28 additions & 0 deletions tests/expr_and_series/log10_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import compare_dicts

data = {"a": [-1, 0, 1, 10, 100.0]}


def test_log_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").log10())
expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]}
compare_dicts(result, expected)


def test_log_series(constructor_eager: Any) -> None:
series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
result = series.log10()
expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]}
compare_dicts({"a": result}, expected)
28 changes: 28 additions & 0 deletions tests/expr_and_series/log_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import compare_dicts

data = {"a": [-1, 0, 1, 2, 4.0]}


def test_log_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").log(base=2))
expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]}
compare_dicts(result, expected)


def test_log_series(constructor_eager: Any) -> None:
series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
result = series.log(base=2)
expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]}
compare_dicts({"a": result}, expected)

0 comments on commit 8a980b7

Please sign in to comment.