Skip to content

Commit

Permalink
Fixes neo4j-contrib#2724: Broken handling of AWS S3 urls (neo4j-contr…
Browse files Browse the repository at this point in the history
…ib#2725)

* Fixes neo4j-contrib#2724, AWS S3 url handling

* Fixes neo4j-contrib#2269: apoc.load procedures don't work anymore with urls containing %

* Code formatting

* Adds fix for the getHost problem

Co-authored-by: Giuseppe Villani <[email protected]>
Co-authored-by: Nacho Cordón <[email protected]>
  • Loading branch information
3 people committed May 17, 2022
1 parent d8aa7be commit 12fd21b
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
7 changes: 0 additions & 7 deletions core/src/main/java/apoc/util/FileUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ public StreamConnection getStreamConnection(String urlAddress, Map<String, Objec
return FileUtils.openS3InputStream(urlAddress);
case hdfs:
return FileUtils.openHdfsInputStream(urlAddress);
case ftp:
case http:
case https:
case gs:
Expand Down Expand Up @@ -183,12 +182,6 @@ public static CountingInputStream inputStreamFor(Object input, Map<String, Objec
}
}

public static CountingReader readFile(String fileName) throws IOException, FileNotFoundException {
File file = new File(fileName);
if (!file.exists() || !file.isFile() || !file.canRead()) throw new IOException("Cannot open file "+fileName+" for reading.");
return new CountingReader(file);
}

public static String changeFileUrlIfImportDirectoryConstrained(String url) throws IOException {
if (isFile(url) && isImportUsingNeo4jConfig()) {
if (!apocConfig().getBoolean(APOC_IMPORT_FILE_ALLOW__READ__FROM__FILESYSTEM)) {
Expand Down
39 changes: 26 additions & 13 deletions core/src/main/java/apoc/util/s3/S3ParamsExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import apoc.util.Util;
import com.amazonaws.regions.Regions;

import java.net.URI;
import java.net.URL;
import java.util.Map;
import java.util.Objects;
Expand All @@ -14,19 +15,25 @@ public class S3ParamsExtractor {
private static final String SECRET_KEY = "secretKey";
private static final String SESSION_TOKEN = "sessionToken";

public static S3Params extract(URL url) throws IllegalArgumentException {
public static S3Params extract(URL url) throws IllegalArgumentException {
return extract(url.toString());
}

public static S3Params extract(String url) throws IllegalArgumentException {

URI uri = URI.create(url);

if (!PROTOCOL.equals(url.getProtocol())) {
throw new IllegalArgumentException("Unsupported protocol '" + url.getProtocol() + "'");
if (!PROTOCOL.equals(uri.getScheme())) {
throw new IllegalArgumentException("Unsupported protocol '" + uri.getScheme() + "'");
}

//aws credentials
String accessKey = null;
String secretKey = null;
String sessionToken = null;

if (url.getUserInfo() != null) {
String[] credentials = url.getUserInfo().split(":");
if (uri.getUserInfo() != null) {
String[] credentials = uri.getUserInfo().split(":");
if (credentials.length > 1) {
accessKey = credentials[0];
secretKey = credentials[1];
Expand All @@ -36,26 +43,32 @@ public static S3Params extract(URL url) throws IllegalArgumentException {
}
// User info part cannot contain session token.
} else {
Map<String, String> params = Util.getRequestParameter(url.getQuery());
Map<String, String> params = Util.getRequestParameter(uri.getQuery());
if(Objects.nonNull(params)) {
if(params.containsKey(ACCESS_KEY)){accessKey = params.get(ACCESS_KEY);}
if(params.containsKey(SECRET_KEY)){secretKey = params.get(SECRET_KEY);}
if(params.containsKey(SESSION_TOKEN)){sessionToken = params.get(SESSION_TOKEN);}
}
}

// endpoint
String endpoint = url.getHost();
// We have to use the getAuthority here instead of getHost, because addresses
// like us-east-1.127.0.0.1:55220 would return null for the later one.
// The downside is we have to clean the credentials preceding the @ if they are there,
// which .getHost would not return
String endpoint = uri.getAuthority();
int atIndex = endpoint.indexOf( "@" );
if (atIndex != -1)
endpoint = endpoint.substring( atIndex + 1 );

Integer slashIndex = url.getPath().lastIndexOf("/");
Integer slashIndex = uri.getPath().indexOf("/", 1);
String key;
String bucket ;

if(slashIndex > 0){
// key
key = url.getPath().substring(slashIndex + 1);
key = uri.getPath().substring(slashIndex + 1);
// bucket
bucket = url.getPath().substring(1, slashIndex);
bucket = uri.getPath().substring(1, slashIndex);
}
else{
throw new IllegalArgumentException("Invalid url. Must be:\n's3://accessKey:secretKey@endpoint:port/bucket/key' or\n's3://endpoint:port/bucket/key?accessKey=accessKey&secretKey=secretKey'");
Expand Down Expand Up @@ -87,8 +100,8 @@ public static S3Params extract(URL url) throws IllegalArgumentException {
}
}

if (url.getPort() != 80 && url.getPort() != 443 && url.getPort() > 0) {
endpoint += ":" + url.getPort();
if (endpoint != null) {
endpoint = endpoint.replaceAll( ":443", "").replaceAll( ":80", "" );
}

if (Objects.nonNull(endpoint) && endpoint.isEmpty()) {
Expand Down
42 changes: 42 additions & 0 deletions core/src/test/java/apoc/util/s3/S3ParamsExtractorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package apoc.util.s3;

import org.junit.Test;
import static org.junit.Assert.*;

public class S3ParamsExtractorTest {

@Test
public void testEncodedS3Url() throws Exception {
S3Params params = S3ParamsExtractor.extract("s3://accessKeyId:some%2Fsecret%2Fkey:some%2Fsession%[email protected]:1234/bucket/path/to/key");
assertEquals("some/secret/key", params.getSecretKey());
assertEquals("some/session/token", params.getSessionToken());
assertEquals("accessKeyId", params.getAccessKey());
assertEquals("bucket", params.getBucket());
assertEquals("path/to/key", params.getKey());
assertEquals("s3.us-east-2.amazonaws.com:1234", params.getEndpoint());
assertEquals("us-east-2", params.getRegion());
}

@Test
public void testEncodedS3UrlQueryParams() throws Exception {
S3Params params = S3ParamsExtractor.extract("s3://s3.us-east-2.amazonaws.com:1234/bucket/path/to/key?accessKey=accessKeyId&secretKey=some%2Fsecret%2Fkey&sessionToken=some%2Fsession%2Ftoken");
assertEquals("some/secret/key", params.getSecretKey());
assertEquals("some/session/token", params.getSessionToken());
assertEquals("accessKeyId", params.getAccessKey());
assertEquals("bucket", params.getBucket());
assertEquals("path/to/key", params.getKey());
assertEquals("s3.us-east-2.amazonaws.com:1234", params.getEndpoint());
}

@Test
public void testExtractEndpointPort() throws Exception {
assertEquals("s3.amazonaws.com", S3ParamsExtractor.extract("s3://s3.amazonaws.com:80/bucket/path/to/key").getEndpoint());
assertEquals("s3.amazonaws.com:1234", S3ParamsExtractor.extract("s3://s3.amazonaws.com:1234/bucket/path/to/key").getEndpoint());
}

@Test
public void testExtractRegion() throws Exception {
assertEquals("us-east-2", S3ParamsExtractor.extract("s3://s3.us-east-2.amazonaws.com:80/bucket/path/to/key").getRegion());
assertNull(S3ParamsExtractor.extract("s3://s3.amazonaws.com:80/bucket/path/to/key").getRegion());
}
}

0 comments on commit 12fd21b

Please sign in to comment.