From 35fe073d53ad5a8c1e1ccd75bef46af7662f735f Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 7 Feb 2024 17:54:30 +0100 Subject: [PATCH 1/3] refactor(python): Minor refactor of `DataFrame.to_numpy` structured code --- py-polars/polars/dataframe/frame.py | 71 +++++++++++++++-------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 2e27ace8015a..10ed60a10cc7 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2076,8 +2076,11 @@ def to_numpy( Parameters ---------- structured - Optionally return a structured array, with field names and - dtypes that correspond to the DataFrame schema. + Return a `structured array`_ with a data type that corresponds to the + DataFrame schema. If set to `False` (default), a regular `ndarray` is + returned instead. + + .. structured array: https://numpy.org/doc/stable/user/basics.rec.html order The index order of the returned NumPy array, either C-like or Fortran-like. In general, using the Fortran-like index order is faster. @@ -2130,36 +2133,33 @@ def to_numpy( dtype=[('foo', 'u1'), ('bar', ' Iterator[Series]: """ - Returns an iterator over the DataFrame's columns. + Returns an iterator over the columns of this DataFrame. + + Yields + ------ + Series Notes ----- Consider whether you can use :func:`all` instead. If you can, it will be more efficient. - Returns - ------- - Iterator of Series. - Examples -------- >>> df = pl.DataFrame( @@ -9939,7 +9939,8 @@ def iter_columns(self) -> Iterator[Series]: │ 10 ┆ 12 │ └─────┴─────┘ """ - return (wrap_s(s) for s in self._df.get_columns()) + for s in self._df.get_columns(): + yield wrap_s(s) def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]: r""" From a1a756df6a25a042e3e2bce9e7d26feb8fae1e89 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 7 Feb 2024 17:57:16 +0100 Subject: [PATCH 2/3] Wording --- py-polars/polars/dataframe/frame.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 10ed60a10cc7..c9ab1e4edb42 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2069,15 +2069,13 @@ def to_numpy( use_pyarrow: bool = True, ) -> np.ndarray[Any, Any]: """ - Convert DataFrame to a 2D NumPy array. - - This operation clones data. + Convert this DataFrame to a NumPy ndarray. Parameters ---------- structured Return a `structured array`_ with a data type that corresponds to the - DataFrame schema. If set to `False` (default), a regular `ndarray` is + DataFrame schema. If set to `False` (default), a 2D ndarray is returned instead. .. structured array: https://numpy.org/doc/stable/user/basics.rec.html From 6ed3025946c16f6afa3df8bf74e9d7490e175dac Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 7 Feb 2024 18:28:12 +0100 Subject: [PATCH 3/3] Formatting --- py-polars/polars/dataframe/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c9ab1e4edb42..46bd46c26f41 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2078,7 +2078,7 @@ def to_numpy( DataFrame schema. If set to `False` (default), a 2D ndarray is returned instead. - .. structured array: https://numpy.org/doc/stable/user/basics.rec.html + .. _structured array: https://numpy.org/doc/stable/user/basics.rec.html order The index order of the returned NumPy array, either C-like or Fortran-like. In general, using the Fortran-like index order is faster.