Skip to content

Commit

Permalink
rename size -> file_size
Browse files Browse the repository at this point in the history
  • Loading branch information
eeroel committed Nov 15, 2023
1 parent 3754009 commit c5b5401
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 17 deletions.
2 changes: 1 addition & 1 deletion python/pyarrow/_dataset.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ from pyarrow.lib cimport *
from pyarrow._fs cimport FileSystem


cdef CFileSource _make_file_source(object file, FileSystem filesystem=*, int64_t size=*)
cdef CFileSource _make_file_source(object file, FileSystem filesystem=*, int64_t file_size=*)


cdef class DatasetFactory(_Weakrefable):
Expand Down
16 changes: 8 additions & 8 deletions python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _get_parquet_symbol(name):
return _dataset_pq and getattr(_dataset_pq, name)


cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int64_t size=-1):
cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int64_t file_size=-1):

cdef:
CFileSource c_source
Expand All @@ -115,8 +115,8 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int6
c_filesystem = filesystem.unwrap()
c_path = tobytes(_stringify_path(file))

if size >= 0:
c_size = size
if file_size >= 0:
c_size = file_size
c_source = CFileSource(move(c_path), move(c_size), move(c_filesystem))
else:
c_source = CFileSource(move(c_path), move(c_filesystem))
Expand Down Expand Up @@ -1242,7 +1242,7 @@ cdef class FileFormat(_Weakrefable):

def make_fragment(self, file, filesystem=None,
Expression partition_expression=None,
*, size=None):
*, file_size=None):
"""
Make a FileFragment from a given file.
Expand All @@ -1256,7 +1256,7 @@ cdef class FileFormat(_Weakrefable):
partition_expression : Expression, optional
An expression that is guaranteed true for all rows in the fragment. Allows
fragment to be potentially skipped while scanning with a filter.
size : int, optional
file_size : int, optional
The size of the file in bytes. Can improve performance with high-latency filesystems
when file size needs to be known before reading.
Expand All @@ -1270,9 +1270,9 @@ cdef class FileFormat(_Weakrefable):
int64_t c_size = -1
if partition_expression is None:
partition_expression = _true
if size is not None:
c_size = size
c_source = _make_file_source(file, filesystem=filesystem, size=c_size)
if file_size is not None:
c_size = file_size
c_source = _make_file_source(file, filesystem=filesystem, file_size=c_size)
c_fragment = <shared_ptr[CFragment]> GetResultValue(
self.format.MakeFragment(move(c_source),
partition_expression.unwrap(),
Expand Down
12 changes: 6 additions & 6 deletions python/pyarrow/_dataset_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ cdef class ParquetFileFormat(FileFormat):
return f"<ParquetFileFormat read_options={self.read_options}>"

def make_fragment(self, file, filesystem=None,
Expression partition_expression=None, row_groups=None, *, size=None):
Expression partition_expression=None, row_groups=None, *, file_size=None):
"""
Make a FileFragment from a given file.
Expand All @@ -251,7 +251,7 @@ cdef class ParquetFileFormat(FileFormat):
fragment to be potentially skipped while scanning with a filter.
row_groups : Iterable, optional
The indices of the row groups to include
size : int, optional
file_size : int, optional
The size of the file in bytes. Can improve performance with high-latency filesystems
when file size needs to be known before reading.
Expand All @@ -266,13 +266,13 @@ cdef class ParquetFileFormat(FileFormat):
int64_t c_size = -1
if partition_expression is None:
partition_expression = _true
if size is not None:
c_size = size
if file_size is not None:
c_size = file_size
if row_groups is None:
return super().make_fragment(file, filesystem,
partition_expression, size=size)
partition_expression, file_size=file_size)

c_source = _make_file_source(file, filesystem, size=c_size)
c_source = _make_file_source(file, filesystem, file_size=c_size)
c_row_groups = [<int> row_group for row_group in set(row_groups)]

c_fragment = <shared_ptr[CFragment]> GetResultValue(
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ def test_make_fragment_with_size(s3_example_simple):
assert tbl.equals(table)

sizes_toosmall = [1]
fragments_with_size = [file_format.make_fragment(path, fs, size=size)
fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
for path, size in zip(paths, sizes_toosmall)]

dataset_with_size = ds.FileSystemDataset(
Expand All @@ -1011,7 +1011,7 @@ def test_make_fragment_with_size(s3_example_simple):
table = dataset_with_size.to_table()

sizes_toolarge = [1000000]
fragments_with_size = [file_format.make_fragment(path, fs, size=size)
fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
for path, size in zip(paths, sizes_toolarge)]

dataset_with_size = ds.FileSystemDataset(
Expand Down

0 comments on commit c5b5401

Please sign in to comment.