From cc6d0590cb36f0b171c78866484ff3143964980e Mon Sep 17 00:00:00 2001 From: Ben Reeves Date: Wed, 31 Jan 2024 21:32:55 -0600 Subject: [PATCH 1/2] docs(python): clarify behavior of `DataFrame.rows_by_key`. When I read the docs for this method for the first time, it was not at all clear to me how the returned dictionary was constructed until I scrolled all the way down to the examples. And even then, it took me a little while to understand. I edited the description to be a bit more clear about how the dictionary is structured, while trying not to be too redundant with the examples and parameter docs. Also, while I was here, I added a little blurb to the `rows` docs as well. --- py-polars/polars/dataframe/frame.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 46bd46c26f41..dbdac17729a5 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -9580,6 +9580,9 @@ def rows( """ Returns all data in the DataFrame as a list of rows of python-native values. + By default, each row is a tuple of values in the same order as `self.columns`. + You can set `named=True` to get dictionaries instead. + Parameters ---------- named @@ -9643,7 +9646,10 @@ def rows_by_key( unique: bool = False, ) -> dict[Any, Iterable[Any]]: """ - Returns DataFrame data as a keyed dictionary of python-native values. + Returns all data as a dictionary of python-native values keyed by some column. + + This method is like `rows`, but instead of returning rows in a flat list, rows + are grouped by the values in the `key` column(s) and returned as a dictionary. Note that this method should not be used in place of native operations, due to the high cost of materializing all frame data out into a dictionary; it should From 337e9bf631c3af0a67b22ff6eda7b741d814aa8c Mon Sep 17 00:00:00 2001 From: alexander-beedie Date: Fri, 9 Feb 2024 22:35:34 +0400 Subject: [PATCH 2/2] minor tweaks --- py-polars/polars/dataframe/frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index dbdac17729a5..d5895496d224 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -9580,8 +9580,9 @@ def rows( """ Returns all data in the DataFrame as a list of rows of python-native values. - By default, each row is a tuple of values in the same order as `self.columns`. - You can set `named=True` to get dictionaries instead. + By default, each row is returned as a tuple of values given in the same order + as the frame columns. Setting `named=True` will return rows of dictionaries + instead. Parameters ---------- @@ -9601,12 +9602,13 @@ def rows( -------- Row-iteration is not optimal as the underlying data is stored in columnar form; where possible, prefer export via one of the dedicated export/output methods. - Where possible you should also consider using `iter_rows` instead to avoid - materialising all the data at once. + You should also consider using `iter_rows` instead, to avoid materialising all + the data at once; there is little performance difference between the two, but + peak memory can be reduced if processing rows in batches. Returns ------- - list of tuples (default) or dictionaries of row values + list of row value tuples (default), or list of dictionaries (if `named=True`). See Also --------