Skip to content

Commit

Permalink
remove redundant constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
eeroel committed Nov 15, 2023
1 parent c5b5401 commit a4b0f35
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 15 deletions.
5 changes: 0 additions & 5 deletions cpp/src/arrow/dataset/file_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ class ARROW_DS_EXPORT FileSource : public util::EqualityComparable<FileSource> {
: file_info_(std::move(path)),
filesystem_(std::move(filesystem)),
compression_(compression) {}
FileSource(std::string path, int64_t size, std::shared_ptr<fs::FileSystem> filesystem,
Compression::type compression = Compression::UNCOMPRESSED)
: file_info_(std::move(path), std::move(size)),
filesystem_(std::move(filesystem)),
compression_(compression) {}
FileSource(fs::FileInfo info, std::shared_ptr<fs::FileSystem> filesystem,
Compression::type compression = Compression::UNCOMPRESSED)
: file_info_(std::move(info)),
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {

explicit FileInfo(std::string path, FileType type = FileType::Unknown)
: path_(std::move(path)), type_(type) {}
explicit FileInfo(std::string path, int64_t size, FileType type = FileType::Unknown)
: path_(std::move(path)), type_(type), size_(size) {}

/// The file type
FileType type() const { return type_; }
void set_type(FileType type) { type_ = type; }
Expand Down
3 changes: 1 addition & 2 deletions python/pyarrow/_dataset.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow_dataset cimport *
from pyarrow.lib cimport *
from pyarrow._fs cimport FileSystem
from pyarrow._fs cimport FileSystem, FileInfo


cdef CFileSource _make_file_source(object file, FileSystem filesystem=*, int64_t file_size=*)


cdef class DatasetFactory(_Weakrefable):

cdef:
Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._acero cimport ExecNodeOptions
from pyarrow._compute cimport Expression, _bind
from pyarrow._compute import _forbid_instantiation
from pyarrow._fs cimport FileSystem, FileSelector
from pyarrow._fs cimport FileSystem, FileSelector, FileInfo
from pyarrow._csv cimport (
ConvertOptions, ParseOptions, ReadOptions, WriteOptions)
from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
Expand Down Expand Up @@ -101,6 +101,7 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int6
cdef:
CFileSource c_source
shared_ptr[CFileSystem] c_filesystem
CFileInfo c_info
c_string c_path
shared_ptr[CRandomAccessFile] c_file
shared_ptr[CBuffer] c_buffer
Expand All @@ -117,7 +118,9 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int6

if file_size >= 0:
c_size = file_size
c_source = CFileSource(move(c_path), move(c_size), move(c_filesystem))
info = FileInfo(c_path, size=c_size)
c_info = info.unwrap()
c_source = CFileSource(move(c_info), move(c_filesystem))
else:
c_source = CFileSource(move(c_path), move(c_filesystem))
elif hasattr(file, 'read'):
Expand Down
17 changes: 14 additions & 3 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,15 +991,25 @@ def test_make_fragment_with_size(s3_example_simple):

fragments = [file_format.make_fragment(path, fs)
for path in paths]

dataset = ds.FileSystemDataset(
fragments, format=file_format, schema=table.schema, filesystem=fs
)

tbl = dataset.to_table()
assert tbl.equals(table)

sizes_toosmall = [1]
# true sizes -> works
sizes_true = [dataset.filesystem.get_file_info(x).size for x in dataset.files]
fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
for path, size in zip(paths, sizes_true)]
dataset_with_size = ds.FileSystemDataset(
fragments_with_size, format=file_format, schema=table.schema, filesystem=fs
)
tbl = dataset.to_table()
assert tbl.equals(table)

# too small sizes -> error
sizes_toosmall = [1 for path in paths]
fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
for path, size in zip(paths, sizes_toosmall)]

Expand All @@ -1010,7 +1020,8 @@ def test_make_fragment_with_size(s3_example_simple):
with pytest.raises(pyarrow.lib.ArrowInvalid, match='Parquet file size is 1 bytes'):
table = dataset_with_size.to_table()

sizes_toolarge = [1000000]
# too large sizes -> error
sizes_toolarge = [1000000 for path in paths]
fragments_with_size = [file_format.make_fragment(path, fs, file_size=size)
for path, size in zip(paths, sizes_toolarge)]

Expand Down

0 comments on commit a4b0f35

Please sign in to comment.