Skip to content

Commit

Permalink
[BUG]: use recordbatch instead of table for df.to_arrow_iter (#2724)
Browse files Browse the repository at this point in the history
see #2679
  • Loading branch information
universalmind303 authored Aug 26, 2024
1 parent 20ffed4 commit 1d9dc67
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
8 changes: 4 additions & 4 deletions daft/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,16 +286,16 @@ def iter_rows(self, results_buffer_size: Optional[int] = NUM_CPUS) -> Iterator[D
yield row

@DataframePublicAPI
def to_arrow_iter(self, results_buffer_size: Optional[int] = 1) -> Iterator["pyarrow.Table"]:
def to_arrow_iter(self, results_buffer_size: Optional[int] = 1) -> Iterator["pyarrow.RecordBatch"]:
"""
Return an iterator of pyarrow tables for this dataframe.
Return an iterator of pyarrow recordbatches for this dataframe.
"""
if results_buffer_size is not None and not results_buffer_size > 0:
raise ValueError(f"Provided `results_buffer_size` value must be > 0, received: {results_buffer_size}")
if self._result is not None:
# If the dataframe has already finished executing,
# use the precomputed results.
yield self.to_arrow()
yield from self.to_arrow().to_batches()

else:
# Execute the dataframe in a streaming fashion.
Expand All @@ -304,7 +304,7 @@ def to_arrow_iter(self, results_buffer_size: Optional[int] = 1) -> Iterator["pya

# Iterate through partitions.
for partition in partitions_iter:
yield partition.to_arrow()
yield from partition.to_arrow().to_batches()

@DataframePublicAPI
def iter_partitions(
Expand Down
2 changes: 1 addition & 1 deletion tests/table/test_from_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,4 +669,4 @@ def __iter__(self):
def test_to_arrow_iterator() -> None:
df = daft.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
it = df.to_arrow_iter()
assert isinstance(next(it), pa.Table)
assert isinstance(next(it), pa.RecordBatch)

0 comments on commit 1d9dc67

Please sign in to comment.