diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index 7188b2c36..1c5470b52 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -27,6 +27,8 @@ - is_null - is_unique - len + - log + - log10 - max - mean - min diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index 9868e7b98..265360c4a 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -33,6 +33,8 @@ - is_unique - item - len + - log + - log10 - max - mean - min diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 1aceb576f..613166cf1 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -332,6 +332,12 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") + def log(self: Self, base: float) -> Self: + return reuse_series_implementation(self, "log", base) + + def log10(self: Self) -> Self: + return reuse_series_implementation(self, "log10") + @property def dt(self: Self) -> ArrowExprDateTimeNamespace: return ArrowExprDateTimeNamespace(self) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 90e800796..e43a3f274 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -702,6 +702,16 @@ def mode(self: Self) -> ArrowSeries: plx.col(col_token) == plx.col(col_token).max() )[self.name] + def log(self: Self, base: float) -> Self: + import pyarrow.compute as pc # ignore-banned-import() + + return self._from_native_series(pc.logb(self._native_series, base)) + + def log10(self: Self) -> Self: + import pyarrow.compute as pc # ignore-banned-import() + + return self._from_native_series(pc.log10(self._native_series)) + @property def shape(self) -> tuple[int]: return (len(self._native_series),) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 06be54394..2174f43d1 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -347,6 +347,12 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: def mode(self: Self) -> Self: return reuse_series_implementation(self, "mode") + def log(self: Self, base: float) -> Self: + return reuse_series_implementation(self, "log", base) + + def log10(self: Self) -> Self: + return reuse_series_implementation(self, "log10") + @property def str(self: Self) -> PandasLikeExprStringNamespace: return PandasLikeExprStringNamespace(self) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 0092e97c8..98994286a 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -665,6 +665,16 @@ def mode(self: Self) -> Self: result.name = native_series.name return self._from_native_series(result) + def log(self: Self, base: float) -> Self: + import numpy as np # ignore-banned-import() + + return self._from_native_series(np.log(self._native_series) / np.log(base)) + + def log10(self: Self) -> Self: + import numpy as np # ignore-banned-import() + + return self._from_native_series(np.log10(self._native_series)) + @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) diff --git a/narwhals/expr.py b/narwhals/expr.py index b04a471da..13b4c5b7a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -1953,6 +1954,104 @@ def mode(self: Self) -> Self: """ return self.__class__(lambda plx: self._call(plx).mode()) + def log(self: Self, base: float = math.e) -> Self: + r""" + Compute the logarithm to a given base. + + Arguments: + base: Given base, defaults to `e` + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = {"values": [1, 2, 3]} + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.with_columns(values_log_2=nw.col("values").log(base=2)) + + We can then pass any supported library such as pandas, Polars (eager), + or PyArrow to `func`: + + >>> func(pd.DataFrame(data)) + values values_log_2 + 0 1 0.000000 + 1 2 1.000000 + 2 3 1.584963 + + >>> func(pl.DataFrame(data)) + shape: (3, 2) + ┌────────┬──────────────┐ + │ values ┆ values_log_2 │ + │ --- ┆ --- │ + │ i64 ┆ f64 │ + ╞════════╪══════════════╡ + │ 1 ┆ 0.0 │ + │ 2 ┆ 1.0 │ + │ 3 ┆ 1.584963 │ + └────────┴──────────────┘ + >>> func(pa.table(data)) + pyarrow.Table + values: int64 + values_log_2: double + ---- + values: [[1,2,3]] + values_log_2: [[0,1,1.5849625007211563]] + """ + return self.__class__(lambda plx: self._call(plx).log(base=base)) + + def log10(self: Self) -> Self: + r""" + Compute the base 10 logarithm of the input array, element-wise. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = {"values": [1.0, 2.0, 4.0]} + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(df): + ... return df.with_columns(values_log_10=nw.col("values").log10()) + + We can then pass any supported library such as pandas, Polars (eager), + or PyArrow to `func`: + + >>> func(pd.DataFrame(data)) + values values_log_10 + 0 1.0 0.00000 + 1 2.0 0.30103 + 2 4.0 0.60206 + + >>> func(pl.DataFrame(data)) + shape: (3, 2) + ┌────────┬───────────────┐ + │ values ┆ values_log_10 │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞════════╪═══════════════╡ + │ 1.0 ┆ 0.0 │ + │ 2.0 ┆ 0.30103 │ + │ 4.0 ┆ 0.60206 │ + └────────┴───────────────┘ + >>> func(pa.table(data)) + pyarrow.Table + values: double + values_log_10: double + ---- + values: [[1,2,4]] + values_log_10: [[0,0.3010299956639812,0.6020599913279624]] + + """ + return self.__class__(lambda plx: self._call(plx).log10()) + @property def str(self: Self) -> ExprStringNamespace: return ExprStringNamespace(self) diff --git a/narwhals/series.py b/narwhals/series.py index 441900a31..e9a8e8446 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -2406,6 +2407,102 @@ def mode(self: Self) -> Self: """ return self._from_compliant_series(self._compliant_series.mode()) + def log(self: Self, base: float = math.e) -> Self: + r""" + Compute the logarithm to a given base. + + Arguments: + base: Given base, defaults to `e` + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = [1, 2, 3] + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.log(base=2) + + We can then pass any supported library such as pandas, Polars (eager), + or PyArrow to `func`: + + >>> func(pd.Series(data)) + 0 0.000000 + 1 1.000000 + 2 1.584963 + dtype: float64 + + >>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [f64] + [ + 0.0 + 1.0 + 1.584963 + ] + >>> func(pa.chunked_array([data])) # doctest: +NORMALIZE_WHITESPACE + + [ + [ + 0, + 1, + 1.5849625007211563 + ] + ] + """ + return self._from_compliant_series(self._compliant_series.log(base=base)) + + def log10(self: Self) -> Self: + r""" + Compute the base 10 logarithm of the input array, element-wise. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> import pyarrow as pa + >>> data = [1.0, 2.0, 4.0] + + We define a library agnostic function: + + >>> @nw.narwhalify + ... def func(s): + ... return s.log10() + + We can then pass any supported library such as pandas, Polars (eager), + or PyArrow to `func`: + + >>> func(pd.Series(data)) + 0 0.00000 + 1 0.30103 + 2 0.60206 + dtype: float64 + + >>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [f64] + [ + 0.0 + 0.30103 + 0.60206 + ] + >>> func(pa.chunked_array([data])) # doctest: +NORMALIZE_WHITESPACE + + [ + [ + 0, + 0.3010299956639812, + 0.6020599913279624 + ] + ] + + """ + return self._from_compliant_series(self._compliant_series.log10()) + @property def str(self) -> SeriesStringNamespace: return SeriesStringNamespace(self) diff --git a/pyproject.toml b/pyproject.toml index f2109e117..d9683ba74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,7 +115,8 @@ filterwarnings = [ 'ignore:.*You are using pyarrow version', 'ignore:.*but when imported by', 'ignore:Distributing .*This may take some time', - 'ignore:.*The default coalesce behavior' + 'ignore:.*The default coalesce behavior', + 'ignore::RuntimeWarning' ] xfail_strict = true markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] diff --git a/tests/expr_and_series/log10_test.py b/tests/expr_and_series/log10_test.py new file mode 100644 index 000000000..3278b31ce --- /dev/null +++ b/tests/expr_and_series/log10_test.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import Constructor +from tests.utils import compare_dicts + +data = {"a": [-1, 0, 1, 10, 100.0]} + + +def test_log_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None: + if "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.select(nw.col("a").log10()) + expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]} + compare_dicts(result, expected) + + +def test_log_series(constructor_eager: Any) -> None: + series = nw.from_native(constructor_eager(data), eager_only=True)["a"] + result = series.log10() + expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]} + compare_dicts({"a": result}, expected) diff --git a/tests/expr_and_series/log_test.py b/tests/expr_and_series/log_test.py new file mode 100644 index 000000000..4d1b9952f --- /dev/null +++ b/tests/expr_and_series/log_test.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Any + +import pytest + +import narwhals.stable.v1 as nw +from tests.utils import Constructor +from tests.utils import compare_dicts + +data = {"a": [-1, 0, 1, 2, 4.0]} + + +def test_log_expr(constructor: Constructor, request: pytest.FixtureRequest) -> None: + if "dask" in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.select(nw.col("a").log(base=2)) + expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]} + compare_dicts(result, expected) + + +def test_log_series(constructor_eager: Any) -> None: + series = nw.from_native(constructor_eager(data), eager_only=True)["a"] + result = series.log(base=2) + expected = {"a": [float("nan"), float("-inf"), 0, 1, 2]} + compare_dicts({"a": result}, expected)