Skip to content

Commit

Permalink
apacheGH-41922: [CI][C++] Update Minio version
Browse files Browse the repository at this point in the history
We were using a rather old version of Minio for CI tests.

Update the Minio version and ensure the S3FileSystem implementation passes all the tests with it.
  • Loading branch information
pitrou committed Sep 25, 2024
1 parent c557fe5 commit 8c63110
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ jobs:
mkdir -p /usr/local/bin
wget \
--output-document /usr/local/bin/minio.exe \
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z
chmod +x /usr/local/bin/minio.exe
- name: Set up Python
uses: actions/[email protected]
Expand Down
2 changes: 1 addition & 1 deletion ci/appveyor-cpp-setup.bat
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ set CXX=cl.exe
@rem Download Minio somewhere on PATH, for unit tests
@rem
if "%ARROW_S3%" == "ON" (
appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z -FileName C:\Windows\Minio.exe || exit /B
appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z -FileName C:\Windows\Minio.exe || exit /B
)


Expand Down
2 changes: 1 addition & 1 deletion ci/docker/python-wheel-windows-test-vs2019.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
# 2. Install Minio for S3 testing.
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python* && \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z \
--output "C:\Windows\Minio.exe"

# Install the GCS testbench using a well-known Python version.
Expand Down
4 changes: 2 additions & 2 deletions ci/scripts/install_minio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ if [ "${version}" != "latest" ]; then
fi

# Use specific versions for minio server and client to avoid CI failures on new releases.
minio_version="minio.RELEASE.2022-05-26T05-48-41Z"
mc_version="mc.RELEASE.2022-05-09T04-08-26Z"
minio_version="minio.RELEASE.2024-09-13T20-26-02Z"
mc_version="mc.RELEASE.2024-09-16T17-43-14Z"

download()
{
Expand Down
58 changes: 42 additions & 16 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2218,11 +2218,28 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
return std::string(FromAwsString(builder_.config().region));
}

// TODO: for every returned error, call GetOrSetBackend()?

template <typename Error>
void SaveBackend(const Aws::Client::AWSError<Error>& error) {
S3Backend GetOrSetBackend(const Aws::Client::AWSError<Error>& error) {
if (!backend_ || *backend_ == S3Backend::Other) {
backend_ = DetectS3Backend(error);
}
DCHECK(backend_.has_value());
return *backend_;
}

Result<S3Backend> GetBackend() {
if (!backend_) {
ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());

S3Model::HeadBucketRequest req;
req.SetBucket("$extremelyunlikelytoexist$");
auto outcome = client_lock.Move()->HeadBucket(req);
DCHECK(!outcome.IsSuccess());
return GetOrSetBackend(outcome.GetError());
}
return *backend_;
}

// Tests to see if a bucket exists
Expand Down Expand Up @@ -2345,12 +2362,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp

if (previous_outcome) {
// Fetch the backend from the previous error
DCHECK(!previous_outcome->IsSuccess());
if (!backend_) {
SaveBackend(previous_outcome->GetError());
DCHECK(backend_);
}
if (backend_ != S3Backend::Minio) {
if (GetOrSetBackend(previous_outcome->GetError()) != S3Backend::Minio) {
// HEAD already returned a 404, nothing more to do
return false;
}
Expand All @@ -2373,9 +2385,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
return true;
}
if (!backend_) {
SaveBackend(outcome.GetError());
DCHECK(backend_);
if (*backend_ == S3Backend::Minio) {
if (GetOrSetBackend(outcome.GetError()) == S3Backend::Minio) {
// Try again with separator-terminated key (see above)
return IsEmptyDirectory(bucket, key);
}
Expand Down Expand Up @@ -3053,6 +3063,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {

auto outcome = client_lock.Move()->HeadBucket(req);
if (!outcome.IsSuccess()) {
impl_->GetOrSetBackend(outcome.GetError());
if (!IsNotFound(outcome.GetError())) {
const auto msg = "When getting information for bucket '" + path.bucket + "': ";
return ErrorToStatus(msg, "HeadBucket", outcome.GetError(),
Expand All @@ -3077,6 +3088,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
FileObjectToInfo(path.key, outcome.GetResult(), &info);
return info;
}
impl_->GetOrSetBackend(outcome.GetError());
if (!IsNotFound(outcome.GetError())) {
const auto msg = "When getting information for key '" + path.key + "' in bucket '" +
path.bucket + "': ";
Expand Down Expand Up @@ -3124,8 +3136,9 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
return impl_->CreateBucket(path.bucket);
}

ARROW_ASSIGN_OR_RAISE(auto backend, impl_->GetBackend());

FileInfo file_info;
// Create object
if (recursive) {
// Ensure bucket exists
ARROW_ASSIGN_OR_RAISE(bool bucket_exists, impl_->BucketExists(path.bucket));
Expand All @@ -3135,7 +3148,8 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {

auto key_i = path.key_parts.begin();
std::string parent_key{};
if (options().check_directory_existence_before_creation) {
if (options().check_directory_existence_before_creation ||
backend == S3Backend::Minio) {
// Walk up the directory first to find the first existing parent
for (const auto& part : path.key_parts) {
parent_key += part;
Expand All @@ -3146,6 +3160,10 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
this->GetFileInfo(path.bucket + kSep + parent_key));
if (file_info.type() != FileType::NotFound) {
// Found!
if (file_info.type() != FileType::Directory) {
return Status::IOError("Cannot create directory '", file_info.path(),
"': a non-directory entry already exists");
}
break;
} else {
// remove the kSep and the part
Expand Down Expand Up @@ -3178,15 +3196,23 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
}
}

// Check if the directory exists already
if (options().check_directory_existence_before_creation) {
// Non-recursive operation

// Check if the entry exists already
if (options().check_directory_existence_before_creation ||
backend == S3Backend::Minio) {
ARROW_ASSIGN_OR_RAISE(file_info, this->GetFileInfo(path.full_path));
if (file_info.type() != FileType::NotFound) {
if (file_info.type() != FileType::Directory) {
return Status::IOError("Cannot create directory '", file_info.path(),
"': a non-directory entry already exists");
}
return Status::OK();
}
}
// XXX Should we check that no non-directory entry exists?
// Minio does it for us, not sure about other S3 implementations.
// NOTE: this won't check that no non-directory entry exists with the same name
// (unlike when `check_directory_existence_before_creation` is enabled).
// Old versions of Minio do it for us, newer versions don't.
return impl_->CreateEmptyDir(path.bucket, path.key);
}

Expand Down
15 changes: 13 additions & 2 deletions cpp/src/arrow/filesystem/s3fs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ TEST_F(TestS3FS, CreateDir) {
ASSERT_OK(fs_->CreateDir("bucket/newdir", /*recursive=*/false));
AssertFileInfo(fs_.get(), "bucket/newdir", FileType::Directory);

// By default CreateDir uses recursvie mode, make it explictly to be false
// Non-recursive, but parent does not exist
ASSERT_RAISES(IOError,
fs_->CreateDir("bucket/newdir/newsub/newsubsub", /*recursive=*/false));

Expand All @@ -1005,7 +1005,8 @@ TEST_F(TestS3FS, CreateDir) {
AssertFileInfo(fs_.get(), "bucket/newdir/newsub/newsubsub", FileType::Directory);

// Existing "file", should fail
ASSERT_RAISES(IOError, fs_->CreateDir("bucket/somefile"));
ASSERT_RAISES(IOError, fs_->CreateDir("bucket/somefile", /*recursive=*/false));
ASSERT_RAISES(IOError, fs_->CreateDir("bucket/somefile", /*recursive=*/true));

// URI
ASSERT_RAISES(Invalid, fs_->CreateDir("s3:bucket/newdir2"));
Expand All @@ -1017,6 +1018,11 @@ TEST_F(TestS3FS, CreateDir) {
// check existing before creation
options_.check_directory_existence_before_creation = true;
MakeFileSystem();

// Existing "file", should fail again
ASSERT_RAISES(IOError, fs_->CreateDir("bucket/somefile", /*recursive=*/false));
ASSERT_RAISES(IOError, fs_->CreateDir("bucket/somefile", /*recursive=*/true));

// New "directory" again
AssertFileInfo(fs_.get(), "bucket/checknewdir", FileType::NotFound);
ASSERT_OK(fs_->CreateDir("bucket/checknewdir"));
Expand All @@ -1031,6 +1037,7 @@ TEST_F(TestS3FS, CreateDir) {
AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub", FileType::Directory);
AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub/newsubsub",
FileType::NotFound);

// Try creation with the same name
ASSERT_OK(fs_->CreateDir("bucket/checknewdir/newsub/newsubsub/newsubsub/",
/*recursive=*/true));
Expand Down Expand Up @@ -1532,6 +1539,10 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {

bool have_implicit_directories() const override { return true; }
bool allow_write_file_over_dir() const override { return true; }
bool allow_write_implicit_dir_over_file() const override {
// Recent Minio versions allow this
return true;
}
bool allow_move_dir() const override { return false; }
bool allow_append_to_file() const override { return false; }
bool have_directory_mtimes() const override { return false; }
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/arrow/filesystem/test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,9 @@ void GenericFileSystemTest::TestMoveFile(FileSystem* fs) {
}
// Parent destination is not a directory
CreateFile(fs, "xxx", "");
ASSERT_RAISES(IOError, fs->Move("AB/pqr", "xxx/mno"));
if (!allow_write_implicit_dir_over_file()) {
ASSERT_RAISES(IOError, fs->Move("AB/pqr", "xxx/mno"));
}
if (!allow_write_file_over_dir()) {
// Destination is a directory
ASSERT_RAISES(IOError, fs->Move("AB/pqr", "EF"));
Expand Down Expand Up @@ -568,8 +570,10 @@ void GenericFileSystemTest::TestCopyFile(FileSystem* fs) {
// Parent destination doesn't exist
ASSERT_RAISES(IOError, fs->CopyFile("AB/abc", "XX/mno"));
}
// Parent destination is not a directory
ASSERT_RAISES(IOError, fs->CopyFile("AB/abc", "def/mno"));
// Parent destination is not a directory ("def" is a file)
if (!allow_write_implicit_dir_over_file()) {
ASSERT_RAISES(IOError, fs->CopyFile("AB/abc", "def/mno"));
}
AssertAllDirs(fs, all_dirs);
AssertAllFiles(fs, {"AB/abc", "EF/ghi", "def"});
}
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/filesystem/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
virtual bool have_implicit_directories() const { return false; }
// - Whether the filesystem may allow writing a file "over" a directory
virtual bool allow_write_file_over_dir() const { return false; }
// - Whether the filesystem may allow writing a directory "over" a file,
// for example copying file "A" to "B/C" while "B" exists and is a file.
virtual bool allow_write_implicit_dir_over_file() const { return false; }
// - Whether the filesystem allows reading a directory
virtual bool allow_read_dir_as_file() const { return false; }
// - Whether the filesystem allows moving a file
Expand Down

0 comments on commit 8c63110

Please sign in to comment.