From 5fda039a57f5e106d3c0990f93da389a1f4c30cd Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Mon, 18 Mar 2024 22:45:15 +0100 Subject: [PATCH] Delete also blobs ending in slash when deleting directory contents AWS S3 allows creating "folder" blobs with the media type "application/x-directory". These blobs should be deleted as well along with the normal blobs when deleting the contents of a directory in order to ensure that the directory corresponding to a table is actually fully deleted. --- .../io/trino/filesystem/s3/S3FileSystem.java | 26 ++++-- .../s3/AbstractTestS3FileSystem.java | 91 +++++++++++++++---- .../AbstractTestTrinoFileSystem.java | 2 +- 3 files changed, 93 insertions(+), 26 deletions(-) diff --git a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3FileSystem.java b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3FileSystem.java index 8228c6d45c58f..74a4b7aab00cc 100644 --- a/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3FileSystem.java +++ b/lib/trino-filesystem-s3/src/main/java/io/trino/filesystem/s3/S3FileSystem.java @@ -36,12 +36,12 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.Set; +import java.util.stream.Stream; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterables.partition; @@ -105,13 +105,13 @@ public void deleteFile(Location location) public void deleteDirectory(Location location) throws IOException { - FileIterator iterator = listFiles(location); + FileIterator iterator = listObjects(location, true); while (iterator.hasNext()) { List files = new ArrayList<>(); while ((files.size() < 1000) && iterator.hasNext()) { files.add(iterator.next().location()); } - deleteFiles(files); + deleteObjects(files); } } @@ -120,7 +120,12 @@ public void deleteFiles(Collection locations) throws IOException { locations.forEach(Location::verifyValidFileLocation); + deleteObjects(locations); + } + private void deleteObjects(Collection locations) + throws IOException + { SetMultimap bucketToKeys = locations.stream() .map(S3Location::new) .collect(toMultimap(S3Location::bucket, S3Location::key, HashMultimap::create)); @@ -170,6 +175,12 @@ public void renameFile(Location source, Location target) @Override public FileIterator listFiles(Location location) throws IOException + { + return listObjects(location, false); + } + + private FileIterator listObjects(Location location, boolean includeDirectoryObjects) + throws IOException { S3Location s3Location = new S3Location(location); @@ -185,10 +196,11 @@ public FileIterator listFiles(Location location) .build(); try { - Iterator iterator = client.listObjectsV2Paginator(request).contents().stream() - .filter(object -> !object.key().endsWith("/")) - .iterator(); - return new S3FileIterator(s3Location, iterator); + Stream s3ObjectStream = client.listObjectsV2Paginator(request).contents().stream(); + if (!includeDirectoryObjects) { + s3ObjectStream = s3ObjectStream.filter(object -> !object.key().endsWith("/")); + } + return new S3FileIterator(s3Location, s3ObjectStream.iterator()); } catch (SdkException e) { throw new IOException("Failed to list location: " + location, e); diff --git a/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java b/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java index d6be2d0501880..bef484bb685d3 100644 --- a/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java +++ b/lib/trino-filesystem-s3/src/test/java/io/trino/filesystem/s3/AbstractTestS3FileSystem.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.io.ByteStreams; +import com.google.common.io.Closer; import io.airlift.log.Logging; import io.trino.filesystem.AbstractTestTrinoFileSystem; import io.trino.filesystem.FileEntry; @@ -31,11 +32,13 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import java.io.Closeable; import java.io.IOException; import java.util.List; import static com.google.common.collect.Iterables.getOnlyElement; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; public abstract class AbstractTestS3FileSystem @@ -167,29 +170,81 @@ void testFileWithTrailingWhitespaceAgainstNativeClient() } @Test - void testExistingFileWithTrailingSlash() + void testExistingDirectoryWithTrailingSlash() throws IOException { - try (S3Client s3Client = createS3Client()) { - String key = "data/file/"; - s3Client.putObject(request -> request.bucket(bucket()).key(key), RequestBody.empty()); - try { - assertThat(fileSystem.listFiles(getRootLocation()).hasNext()).isFalse(); + try (S3Client s3Client = createS3Client(); Closer closer = Closer.create()) { + String key = "data/dir/"; + createDirectory(closer, s3Client, key); + assertThat(fileSystem.listFiles(getRootLocation()).hasNext()).isFalse(); - Location data = getRootLocation().appendPath("data/"); - assertThat(fileSystem.listDirectories(getRootLocation())).containsExactly(data); - assertThat(fileSystem.listDirectories(data)).containsExactly(data.appendPath("file/")); + Location data = getRootLocation().appendPath("data/"); + assertThat(fileSystem.listDirectories(getRootLocation())).containsExactly(data); + assertThat(fileSystem.listDirectories(data)).containsExactly(data.appendPath("dir/")); - // blobs ending in slash are invisible to S3FileSystem and will not be deleted - fileSystem.deleteDirectory(data); - assertThat(fileSystem.listDirectories(getRootLocation())).containsExactly(data); + fileSystem.deleteDirectory(data); + assertThat(fileSystem.listDirectories(getRootLocation())).isEmpty(); - fileSystem.deleteDirectory(getRootLocation()); - assertThat(fileSystem.listDirectories(getRootLocation())).containsExactly(data); - } - finally { - s3Client.deleteObject(delete -> delete.bucket(bucket()).key(key)); - } + fileSystem.deleteDirectory(getRootLocation()); + assertThat(fileSystem.listDirectories(getRootLocation())).isEmpty(); + } + } + + @Test + void testDeleteEmptyDirectoryWithDeepHierarchy() + throws IOException + { + try (S3Client s3Client = createS3Client(); Closer closer = Closer.create()) { + createDirectory(closer, s3Client, "deep/dir"); + createBlob(closer, "deep/dir/file1.txt"); + createBlob(closer, "deep/dir/file2.txt"); + createBlob(closer, "deep/dir/file3.txt"); + createDirectory(closer, s3Client, "deep/dir/dir4"); + createBlob(closer, "deep/dir/dir4/file5.txt"); + + assertThat(fileSystem.listFiles(getRootLocation()).hasNext()).isTrue(); + + Location directory = getRootLocation().appendPath("deep/dir/"); + assertThat(fileSystem.listDirectories(getRootLocation().appendPath("deep"))).containsExactly(directory); + assertThat(fileSystem.listDirectories(directory)).containsExactly(getRootLocation().appendPath("deep/dir/dir4/")); + + fileSystem.deleteDirectory(directory); + assertThat(fileSystem.listDirectories(getRootLocation().appendPath("deep"))).isEmpty(); + assertThat(fileSystem.listDirectories(getRootLocation())).isEmpty(); + assertThat(fileSystem.listFiles(getRootLocation()).hasNext()).isFalse(); + } + } + + protected Location createDirectory(Closer closer, S3Client s3Client, String path) + { + Location location = createLocation(path); + closer.register(new TempDirectory(s3Client, path)).create(); + return location; + } + + protected class TempDirectory + implements Closeable + { + private final S3Client s3Client; + private final String path; + + public TempDirectory(S3Client s3Client, String path) + { + this.s3Client = requireNonNull(s3Client, "s3Client is null"); + String key = requireNonNull(path, "path is null"); + this.path = key.endsWith("/") ? key : key + "/"; + } + + public void create() + { + s3Client.putObject(request -> request.bucket(bucket()).key(path), RequestBody.empty()); + } + + @Override + public void close() + throws IOException + { + s3Client.deleteObject(delete -> delete.bucket(bucket()).key(path)); } } } diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java index e3ca87854b62a..4c668b3935cd5 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java @@ -1310,7 +1310,7 @@ private String readLocation(Location path) } } - private Location createBlob(Closer closer, String path) + protected Location createBlob(Closer closer, String path) { Location location = createLocation(path); closer.register(new TempBlob(location)).createOrOverwrite(TEST_BLOB_CONTENT_PREFIX + location.toString());