Skip to content

Commit

Permalink
HIVE-24838. Reduce FS creation in Warehouse::getDnsPath for object st…
Browse files Browse the repository at this point in the history
…ores (amagyar)
  • Loading branch information
zeroflag committed Mar 25, 2021
1 parent 6e8936f commit 0056b69
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql;

import static org.junit.Assert.assertEquals;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.junit.Before;
import org.junit.Test;

public class TestWarehouseDnsPath {
Configuration conf = new Configuration();

@Before
public void setUp() throws Exception {
conf.set("fs.defaultFS", "hdfs://localhost");
}

@Test
public void testDnsPathNullAuthority() throws Exception {
conf.set("fs.defaultFS", "hdfs://localhost");
assertEquals("hdfs://localhost/path/1", transformPath("hdfs:///path/1"));
conf.set("fs.defaultFS", "s3://bucket");
assertEquals("s3://bucket/path/1", transformPath("s3:///path/1"));
}

@Test
public void testDnsPathWithAuthority() throws Exception {
conf.set("fs.defaultFS", "hdfs://localhost");
assertEquals("hdfs://127.0.0.1/path/1", transformPath("hdfs://127.0.0.1/path/1"));
conf.set("fs.defaultFS", "s3a://bucket");
assertEquals("s3a://bucket/path/1", transformPath("s3a://bucket/path/1"));
}

@Test
public void testDnsPathWithNoScheme() throws Exception {
conf.set("fs.defaultFS", "hdfs://localhost");
assertEquals("hdfs://localhost/path/1", transformPath("/path/1"));
conf.set("fs.defaultFS", "s3n://bucket");
assertEquals("s3n://bucket/path/1", transformPath("/path/1"));
}

private String transformPath(String path) throws MetaException {
return Warehouse.getDnsPath(new Path(path), conf).toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.LinkedHashMap;
Expand Down Expand Up @@ -135,17 +136,50 @@ public FileSystem getFs(Path f) throws MetaException {
* This routine solves this problem by replacing the scheme and authority of a
* path with the scheme and authority of the FileSystem that it maps to.
*
* Since creating a new file system object is expensive, this method
* mimics getFileSystem() without creating an actual FileSystem object.
* When the input path lacks a scheme or an authority this is added
* from the default URI.
*
* @param path
* Path to be canonicalized
* @return Path with canonical scheme and authority
*/
public static Path getDnsPath(Path path, Configuration conf) throws MetaException {
FileSystem fs = getFs(path, conf);
String uriPath = path.toUri().getPath();
if (StringUtils.isEmpty(uriPath)) {
uriPath = "/";
if (isBlobStorageScheme(conf, path.toUri().getScheme())) {
String scheme = path.toUri().getScheme();
String authority = path.toUri().getAuthority();
URI defaultUri = FileSystem.getDefaultUri(conf);
if ((authority == null && scheme == null)
|| StringUtils.equalsIgnoreCase(scheme, defaultUri.getScheme())) {
if (authority == null) {
authority = defaultUri.getAuthority();
}
if (scheme == null) {
scheme = defaultUri.getScheme();
}
String uriPath = path.toUri().getPath();
if (StringUtils.isEmpty(uriPath)) {
uriPath = "/";
}
return new Path(scheme, authority, uriPath);
}
return path;
} else { // fallback: for other FS type make the FS instance
FileSystem fs = getFs(path, conf);
String uriPath = path.toUri().getPath();
if (StringUtils.isEmpty(uriPath)) {
uriPath = "/";
}
return (new Path(fs.getUri().getScheme(), fs.getUri().getAuthority(), uriPath));
}
return (new Path(fs.getUri().getScheme(), fs.getUri().getAuthority(), uriPath));
}

private static boolean isBlobStorageScheme(Configuration conf, String scheme) {
final String uriScheme = scheme == null ? FileSystem.getDefaultUri(conf).getScheme() : scheme;
return MetastoreConf.getStringCollection(conf, MetastoreConf.ConfVars.HIVE_BLOBSTORE_SUPPORTED_SCHEMES)
.stream()
.anyMatch(each -> each.equalsIgnoreCase(uriScheme));
}

public Path getDnsPath(Path path) throws MetaException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,8 @@ public enum ConfVars {
"hive.metastore.custom.database.product.classname", "none",
"Hook for external RDBMS. This class will be instantiated only when " +
"metastore.use.custom.database.product is set to true."),
HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "hive.blobstore.supported.schemes", "s3,s3a,s3n",
"Comma-separated list of supported blobstore schemes."),

// Deprecated Hive values that we are keeping for backwards compatibility.
@Deprecated
Expand Down

0 comments on commit 0056b69

Please sign in to comment.