Skip to content

Commit

Permalink
Add support for WASBS to native Azure file system implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcnc committed Sep 24, 2024
1 parent b57552d commit 74bab73
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

class AzureLocation
{
private static final String INVALID_ABFS_LOCATION_MESSAGE = "Invalid Azure ABFS location. Expected form is 'abfs://[<containerName>@]<accountName>.dfs.<endpoint>/<filePath>': %s";
private static final String INVALID_WASB_LOCATION_MESSAGE = "Invalid Azure WASB location. Expected form is 'wasb://[<containerName>@]<accountName>.blob.<endpoint>/<filePath>': %s";
private static final String INVALID_ABFS_LOCATION_MESSAGE = "Invalid Azure ABFS location. Expected form is 'abfs[s]://[<containerName>@]<accountName>.dfs.<endpoint>/<filePath>': %s";
private static final String INVALID_WASB_LOCATION_MESSAGE = "Invalid Azure WASB location. Expected form is 'wasb[s]://[<containerName>@]<accountName>.blob.<endpoint>/<filePath>': %s";

// https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules
private static final CharMatcher CONTAINER_VALID_CHARACTERS = CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('0', '9')).or(CharMatcher.is('-'));
Expand All @@ -40,18 +40,18 @@ class AzureLocation
* <p>
* Locations use the
* <a href="https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri">ABFS URI</a> syntax:
* <pre>{@code abfs://<container-name>@<storage-account-name>.dfs.<endpoint>/<blob_path>}</pre>
* <pre>{@code abfs[s]://<container-name>@<storage-account-name>.dfs.<endpoint>/<blob_path>}</pre>
*/
public AzureLocation(Location location)
{
this.location = requireNonNull(location, "location is null");
// abfss and wasb are also supported but not documented
// wasb and wasbs are also supported but not documented
scheme = location.scheme().orElseThrow(() -> new IllegalArgumentException(String.format(INVALID_ABFS_LOCATION_MESSAGE, location)));
String invalidLocationMessage;
if ("abfs".equals(scheme) || "abfss".equals(scheme)) {
invalidLocationMessage = INVALID_ABFS_LOCATION_MESSAGE;
}
else if ("wasb".equals(scheme)) {
else if ("wasb".equals(scheme) || "wasbs".equals(scheme)) {
invalidLocationMessage = INVALID_WASB_LOCATION_MESSAGE;
}
else {
Expand Down Expand Up @@ -86,7 +86,7 @@ else if ("wasb".equals(scheme)) {
this.location);
this.account = host.substring(0, accountSplit);

// abfs[s] host must contain ".dfs.", and wasb host must contain ".blob." before endpoint
// abfs[s] host must contain ".dfs.", and wasb[s] host must contain ".blob." before endpoint
if (scheme.equals("abfs") || scheme.equals("abfss")) {
checkArgument(host.substring(accountSplit).startsWith(".dfs."), invalidLocationMessage, location);
// endpoint does not include dfs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void test()
assertValid("abfs://[email protected]/some/path/file", "account", "container", "some/path/file", "abfs", "core.windows.net");
assertValid("abfss://[email protected]/some/path/file", "account", "container", "some/path/file", "abfss", "core.windows.net");
assertValid("wasb://[email protected]/some/path/file", "account", "container", "some/path/file", "wasb", "core.windows.net");
assertValid("wasbs://[email protected]/some/path/file", "account", "container", "some/path/file", "wasbs", "core.windows.net");

assertValid("abfs://[email protected]/some/path/file", "account", "container-stuff", "some/path/file", "abfs", "core.windows.net");
assertValid("abfs://[email protected]/some/path/file", "account", "container2", "some/path/file", "abfs", "core.windows.net");
Expand All @@ -41,16 +42,18 @@ void test()
assertValid("abfs://[email protected]/some/path/file", "account", "container", "some/path/file", "abfs", "core.usgovcloudapi.net");
assertValid("abfss://[email protected]/some/path/file", "account", "container", "some/path/file", "abfss", "core.usgovcloudapi.net");
assertValid("wasb://[email protected]/some/path/file", "account", "container", "some/path/file", "wasb", "core.usgovcloudapi.net");
assertValid("wasbs://[email protected]/some/path/file", "account", "container", "some/path/file", "wasbs", "core.usgovcloudapi.net");

// abfs[s] host must contain ".dfs.", and wasb host must contain ".blob." before endpoint
// abfs[s] host must contain ".dfs.", and wasb[s] host must contain ".blob." before endpoint
assertInvalid("abfs://[email protected]/some/path/file");
assertInvalid("abfss://[email protected]/some/path/file");
assertInvalid("wasb://[email protected]/some/path/file");
assertInvalid("abfs://[email protected]/some/path/file");
assertInvalid("abfss://[email protected]/some/path/file");
assertInvalid("wasb://[email protected]/some/path/file");
assertInvalid("wasbs://[email protected]/some/path/file");

// only abfs, abfss, and wasb schemes allowed
// only abfs, abfss, wasb, wasbs schemes allowed
assertInvalid("https://[email protected]/some/path/file");

// host must have at least to labels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ protected void setup(Binder binder)
factories.addBinding("abfs").to(AzureFileSystemFactory.class);
factories.addBinding("abfss").to(AzureFileSystemFactory.class);
factories.addBinding("wasb").to(AzureFileSystemFactory.class);
factories.addBinding("wasbs").to(AzureFileSystemFactory.class);
}

if (config.isNativeS3Enabled()) {
Expand Down

0 comments on commit 74bab73

Please sign in to comment.