From 35fe073d53ad5a8c1e1ccd75bef46af7662f735f Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Wed, 7 Feb 2024 17:54:30 +0100
Subject: [PATCH 1/3] refactor(python): Minor refactor of `DataFrame.to_numpy`
 structured code

---
 py-polars/polars/dataframe/frame.py | 71 +++++++++++++++--------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 2e27ace8015a..10ed60a10cc7 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -2076,8 +2076,11 @@ def to_numpy(
         Parameters
         ----------
         structured
-            Optionally return a structured array, with field names and
-            dtypes that correspond to the DataFrame schema.
+            Return a `structured array`_ with a data type that corresponds to the
+            DataFrame schema. If set to `False` (default), a regular `ndarray` is
+            returned instead.
+
+            .. structured array: https://numpy.org/doc/stable/user/basics.rec.html
         order
             The index order of the returned NumPy array, either C-like or
             Fortran-like. In general, using the Fortran-like index order is faster.
@@ -2130,36 +2133,33 @@ def to_numpy(
                   dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
         """
         if structured:
-            # see: https://numpy.org/doc/stable/user/basics.rec.html
             arrays = []
-            for c, tp in self.schema.items():
-                s = self[c]
-                a = s.to_numpy(use_pyarrow=use_pyarrow)
-                arrays.append(
-                    a.astype(str, copy=False)
-                    if tp == String and not s.null_count()
-                    else a
-                )
-
-            out = np.empty(
-                len(self), dtype=list(zip(self.columns, (a.dtype for a in arrays)))
-            )
+            struct_dtype = []
+            for s in self.iter_columns():
+                arr = s.to_numpy(use_pyarrow=use_pyarrow)
+                if s.dtype == String and s.null_count() == 0:
+                    arr = arr.astype(str, copy=False)
+                arrays.append(arr)
+                struct_dtype.append((s.name, arr.dtype))
+
+            out = np.empty(self.height, dtype=struct_dtype)
             for idx, c in enumerate(self.columns):
                 out[c] = arrays[idx]
-        else:
-            if order == "fortran":
-                array = self._df.to_numpy_view()
-                if array is not None:
-                    return array
-
-            out = self._df.to_numpy(order)
-            if out is None:
-                return np.vstack(
-                    [
-                        self.to_series(i).to_numpy(use_pyarrow=use_pyarrow)
-                        for i in range(self.width)
-                    ]
-                ).T
+            return out
+
+        if order == "fortran":
+            array = self._df.to_numpy_view()
+            if array is not None:
+                return array
+
+        out = self._df.to_numpy(order)
+        if out is None:
+            return np.vstack(
+                [
+                    self.to_series(i).to_numpy(use_pyarrow=use_pyarrow)
+                    for i in range(self.width)
+                ]
+            ).T
 
         return out
 
@@ -9888,17 +9888,17 @@ def iter_rows(
 
     def iter_columns(self) -> Iterator[Series]:
         """
-        Returns an iterator over the DataFrame's columns.
+        Returns an iterator over the columns of this DataFrame.
+
+        Yields
+        ------
+        Series
 
         Notes
         -----
         Consider whether you can use :func:`all` instead.
         If you can, it will be more efficient.
 
-        Returns
-        -------
-        Iterator of Series.
-
         Examples
         --------
         >>> df = pl.DataFrame(
@@ -9939,7 +9939,8 @@ def iter_columns(self) -> Iterator[Series]:
         │ 10  ┆ 12  │
         └─────┴─────┘
         """
-        return (wrap_s(s) for s in self._df.get_columns())
+        for s in self._df.get_columns():
+            yield wrap_s(s)
 
     def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]:
         r"""

From a1a756df6a25a042e3e2bce9e7d26feb8fae1e89 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Wed, 7 Feb 2024 17:57:16 +0100
Subject: [PATCH 2/3] Wording

---
 py-polars/polars/dataframe/frame.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 10ed60a10cc7..c9ab1e4edb42 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -2069,15 +2069,13 @@ def to_numpy(
         use_pyarrow: bool = True,
     ) -> np.ndarray[Any, Any]:
         """
-        Convert DataFrame to a 2D NumPy array.
-
-        This operation clones data.
+        Convert this DataFrame to a NumPy ndarray.
 
         Parameters
         ----------
         structured
             Return a `structured array`_ with a data type that corresponds to the
-            DataFrame schema. If set to `False` (default), a regular `ndarray` is
+            DataFrame schema. If set to `False` (default), a 2D ndarray is
             returned instead.
 
             .. structured array: https://numpy.org/doc/stable/user/basics.rec.html

From 6ed3025946c16f6afa3df8bf74e9d7490e175dac Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Wed, 7 Feb 2024 18:28:12 +0100
Subject: [PATCH 3/3] Formatting

---
 py-polars/polars/dataframe/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index c9ab1e4edb42..46bd46c26f41 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -2078,7 +2078,7 @@ def to_numpy(
             DataFrame schema. If set to `False` (default), a 2D ndarray is
             returned instead.
 
-            .. structured array: https://numpy.org/doc/stable/user/basics.rec.html
+            .. _structured array: https://numpy.org/doc/stable/user/basics.rec.html
         order
             The index order of the returned NumPy array, either C-like or
             Fortran-like. In general, using the Fortran-like index order is faster.