Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39069: [C++][FS][Azure] Use the generic filesystem tests #40567

Merged
merged 10 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 92 additions & 25 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1591,7 +1591,9 @@ class AzureFileSystem::Impl {
if (info.type() == FileType::NotFound) {
return PathNotFound(location);
}
DCHECK_EQ(info.type(), FileType::Directory);
if (info.type() != FileType::Directory) {
return NotADir(location);
}
return Status::OK();
}

Expand Down Expand Up @@ -1818,37 +1820,85 @@ class AzureFileSystem::Impl {
const AzureLocation& location, bool recursive) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
// Non-recursive CreateDir calls require the parent directory to exist.
if (!recursive) {
if (recursive) {
// Recursive CreateDir calls require that all path segments be
// either a directory or not found.

// Check each path segment is a directory or not
// found. Nonexistent segments are collected to
// nonexistent_locations. We'll create directories for
// nonexistent segments later.
std::vector<AzureLocation> nonexistent_locations;
for (auto prefix = location; !prefix.path.empty(); prefix = prefix.parent()) {
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client, prefix));
if (info.type() == FileType::File) {
return NotADir(prefix);
}
if (info.type() == FileType::NotFound) {
nonexistent_locations.push_back(prefix);
}
}
// Ensure container exists
ARROW_ASSIGN_OR_RAISE(auto container,
AzureLocation::FromString(location.container));
ARROW_ASSIGN_OR_RAISE(auto container_info,
GetContainerPropsAsFileInfo(container, container_client));
if (container_info.type() == FileType::NotFound) {
try {
container_client.CreateIfNotExists();
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(exception, "Failed to create directory '",
location.all, "': ", container_client.GetUrl());
}
}
// Create nonexistent directories from shorter to longer:
//
// Example:
//
// * location: /container/a/b/c/d/
// * Nonexistent path segments:
// * /container/a/
// * /container/a/c/
// * /container/a/c/d/
// * target_locations:
// 1. /container/a/c/d/
// 2. /container/a/c/
// 3. /container/a/
//
// Create order:
// 1. /container/a/
// 2. /container/a/c/
// 3. /container/a/c/d/
for (size_t i = nonexistent_locations.size(); i > 0; --i) {
const auto& nonexistent_location = nonexistent_locations[i - 1];
try {
create_if_not_exists(container_client, nonexistent_location);
} catch (const Storage::StorageException& exception) {
return ExceptionToStatus(exception, "Failed to create directory '",
location.all, "': ", container_client.GetUrl());
Comment on lines +1877 to +1878
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After the first iteration of this loop, an exception shouldn't report a failure of the entire operation: the full path creates all the prefixes (they are "implied directories"). All the other functions handle implied directories, so it should be OK to rely on them.

}
}
return Status::OK();
} else {
// Non-recursive CreateDir calls require the parent directory to exist.
auto parent = location.parent();
if (!parent.path.empty()) {
RETURN_NOT_OK(CheckDirExists(container_client, parent));
}
// If the parent location is just the container, we don't need to check if it
// exists because the operation we perform below will fail if the container
// doesn't exist and we can handle that error according to the recursive flag.
}
try {
create_if_not_exists(container_client, location);
return Status::OK();
} catch (const Storage::StorageException& exception) {
if (IsContainerNotFound(exception)) {
try {
if (recursive) {
container_client.CreateIfNotExists();
create_if_not_exists(container_client, location);
return Status::OK();
} else {
auto parent = location.parent();
return PathNotFound(parent);
}
} catch (const Storage::StorageException& second_exception) {
return ExceptionToStatus(second_exception, "Failed to create directory '",
location.all, "': ", container_client.GetUrl());
try {
create_if_not_exists(container_client, location);
return Status::OK();
} catch (const Storage::StorageException& exception) {
if (IsContainerNotFound(exception)) {
auto parent = location.parent();
return PathNotFound(parent);
}
return ExceptionToStatus(exception, "Failed to create directory '", location.all,
"': ", container_client.GetUrl());
}
return ExceptionToStatus(exception, "Failed to create directory '", location.all,
"': ", container_client.GetUrl());
}
}

Expand Down Expand Up @@ -2016,8 +2066,15 @@ class AzureFileSystem::Impl {
bool found_dir_marker_blob = false;
try {
auto list_response = container_client.ListBlobs(options);
if (require_dir_to_exist && list_response.Blobs.empty()) {
return PathNotFound(location);
if (list_response.Blobs.empty()) {
if (require_dir_to_exist) {
return PathNotFound(location);
} else {
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client, location));
if (info.type() == FileType::File) {
return NotADir(location);
}
}
}
for (; list_response.HasPage(); list_response.MoveToNextPage()) {
if (list_response.Blobs.empty()) {
Expand Down Expand Up @@ -2732,6 +2789,16 @@ class AzureFileSystem::Impl {
}
auto dest_blob_client = GetBlobClient(dest.container, dest.path);
auto src_url = GetBlobClient(src.container, src.path).GetUrl();
if (!dest.path.empty()) {
auto dest_parent = dest.parent();
if (!dest_parent.path.empty()) {
auto dest_container_client = GetBlobContainerClient(dest_parent.container);
ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(dest_container_client, dest_parent));
if (info.type() == FileType::File) {
return NotADir(dest_parent);
}
}
}
try {
dest_blob_client.CopyFromUri(src_url);
} catch (const Storage::StorageException& exception) {
Expand Down
Loading
Loading