Skip to content

Commit

Permalink
didn't expect a remote URL
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Oct 29, 2024
1 parent bf5b778 commit 007c651
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sys/mman.h>
#include <unistd.h>

#include <regex>
#include <vector>

namespace cudf {
Expand Down Expand Up @@ -389,6 +390,24 @@ class user_datasource_wrapper : public datasource {
datasource* const source; ///< A non-owning pointer to the user-implemented datasource
};

/**
* @brief Remote file source backed by KvikIO, which handles S3 filepaths seamlessly.
*/
class remote_file_source {
public:
/**
* @brief Is `url` referring to a remote file supported by KvikIO?
*
* For now, only S3 urls (urls starting with "s3://") are supported.
*/
static bool is_supported_remote_url(std::string const& url)
{
// Regular expression to match "s3://"
std::regex pattern{R"(^s3://)", std::regex_constants::icase};
return std::regex_search(url, pattern);
}
};

} // namespace

std::unique_ptr<datasource> datasource::create(std::string const& filepath,
Expand All @@ -403,7 +422,8 @@ std::unique_ptr<datasource> datasource::create(std::string const& filepath,

CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy);
}();

CUDF_EXPECTS(!remote_file_source::is_supported_remote_url(filepath),
"didn't expect a remote URL");
if (use_memory_mapping) {
return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, max_size_estimate);
} else {
Expand Down

0 comments on commit 007c651

Please sign in to comment.