Skip to content

Commit

Permalink
revisions
Browse files Browse the repository at this point in the history
  • Loading branch information
eeroel committed Nov 24, 2023
1 parent 771b3b8 commit 45afcf1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/dataset/file_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class ARROW_DS_EXPORT FileSource : public util::EqualityComparable<FileSource> {
Compression::type compression = Compression::UNCOMPRESSED)
: buffer_(std::move(buffer)), compression_(compression) {}


using CustomOpen = std::function<Result<std::shared_ptr<io::RandomAccessFile>>()>;
FileSource(CustomOpen open, int64_t size)
: custom_open_(std::move(open)), custom_size_(size) {}
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {

explicit FileInfo(std::string path, FileType type = FileType::Unknown)
: path_(std::move(path)), type_(type) {}

/// The file type
FileType type() const { return type_; }
void set_type(FileType type) { type_ = type; }
Expand Down
8 changes: 3 additions & 5 deletions python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._acero cimport ExecNodeOptions
from pyarrow._compute cimport Expression, _bind
from pyarrow._compute import _forbid_instantiation
from pyarrow._fs cimport FileSystem, FileSelector, FileInfo
from pyarrow._fs cimport FileSystem, FileSelector
from pyarrow._csv cimport (
ConvertOptions, ParseOptions, ReadOptions, WriteOptions)
from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
Expand Down Expand Up @@ -117,10 +117,8 @@ cdef CFileSource _make_file_source(object file, FileSystem filesystem=None, int6
c_path = tobytes(_stringify_path(file))

if file_size >= 0:
c_size = file_size
info = FileInfo(c_path, size=c_size)
c_info = info.unwrap()
c_source = CFileSource(move(c_info), move(c_filesystem))
c_info = CFileInfo(c_path, CFileType_File)
c_info.set_size(file_size)
else:
c_source = CFileSource(move(c_path), move(c_filesystem))
elif hasattr(file, 'read'):
Expand Down
5 changes: 5 additions & 0 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,11 @@ def test_make_fragment(multisourcefs):
@pytest.mark.parquet
@pytest.mark.s3
def test_make_fragment_with_size(s3_example_simple):
"""
Test passing file_size to make_fragment. Not all FS implementations make use
of the file size (by implementing an OpenInputFile that takes a FileInfo), but
s3 does, which is why it's used here.
"""
table, path, fs, uri, host, port, access_key, secret_key = s3_example_simple

file_format = ds.ParquetFileFormat()
Expand Down

0 comments on commit 45afcf1

Please sign in to comment.