Skip to content

Commit

Permalink
Don't call len on DataFrame interchange protocol object (#3111)
Browse files Browse the repository at this point in the history
* Don't call `len` on DataFrame interchange protocol object

* Add check for max_rows of None
  • Loading branch information
jonmmease authored Jul 17, 2023
1 parent b7ae486 commit 139c86a
Showing 1 changed file with 25 additions and 13 deletions.
38 changes: 25 additions & 13 deletions altair/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,21 @@ def limit_rows(data: _TDataType, max_rows: Optional[int] = 5000) -> _TDataType:
If max_rows is None, then do not perform any check.
"""
check_data_type(data)

def raise_max_rows_error():
raise MaxRowsError(
"The number of rows in your dataset is greater "
f"than the maximum allowed ({max_rows}).\n\n"
"Try enabling the VegaFusion data transformer which "
"raises this limit by pre-evaluating data\n"
"transformations in Python.\n"
" >> import altair as alt\n"
' >> alt.data_transformers.enable("vegafusion")\n\n'
"Or, see https://altair-viz.github.io/user_guide/large_datasets.html "
"for additional information\n"
"on how to plot large datasets."
)

if hasattr(data, "__geo_interface__"):
if data.__geo_interface__["type"] == "FeatureCollection":
values = data.__geo_interface__["features"]
Expand All @@ -91,20 +106,17 @@ def limit_rows(data: _TDataType, max_rows: Optional[int] = 5000) -> _TDataType:
# as equivalent to TDataType
return data # type: ignore[return-value]
elif hasattr(data, "__dataframe__"):
values = data
pi = import_pyarrow_interchange()
pa_table = pi.from_dataframe(data)
if max_rows is not None and pa_table.num_rows > max_rows:
raise_max_rows_error()
# Return pyarrow Table instead of input since the
# `from_dataframe` call may be expensive
return pa_table

if max_rows is not None and len(values) > max_rows:
raise MaxRowsError(
"The number of rows in your dataset is greater "
f"than the maximum allowed ({max_rows}).\n\n"
"Try enabling the VegaFusion data transformer which "
"raises this limit by pre-evaluating data\n"
"transformations in Python.\n"
" >> import altair as alt\n"
' >> alt.data_transformers.enable("vegafusion")\n\n'
"Or, see https://altair-viz.github.io/user_guide/large_datasets.html "
"for additional information\n"
"on how to plot large datasets."
)
raise_max_rows_error()

return data


Expand Down

0 comments on commit 139c86a

Please sign in to comment.