Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core, Spark 3.4: Write properties of PositionDeletesTable should respect ones of BaseTable #8428

Merged
merged 6 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ public Map<Integer, SortOrder> sortOrders() {

@Override
public Map<String, String> properties() {
return ImmutableMap.of();
return table().properties();
}

@Override
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/org/apache/iceberg/PositionDeletesTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.ManifestEvaluator;
Expand Down Expand Up @@ -93,6 +95,16 @@ public Map<Integer, PartitionSpec> specs() {
return specs;
}

@Override
public Map<String, String> properties() {
// The write properties are needed by PositionDeletesRewriteAction,
// these properties should respect the ones of BaseTable.
return Collections.unmodifiableMap(
table().properties().entrySet().stream()
.filter(entry -> entry.getKey().startsWith("write."))
Copy link
Contributor Author

@jerqi jerqi Sep 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find that all the write properties are needed for our PositionDeletesRewriteAction. So I choose to match the key prefix here instead of copying some specific entries.

.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}

private Schema calculateSchema() {
Types.StructType partitionType = Partitioning.partitionType(table());
Schema result =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,20 +182,18 @@ public void testWriteDataWithDifferentSetting() throws Exception {
.isEqualToIgnoringCase(properties.get(COMPRESSION_CODEC));
}

if (PARQUET.equals(format)) {
SparkActions.get(spark)
.rewritePositionDeletes(table)
.option(SizeBasedFileRewriter.REWRITE_ALL, "true")
.execute();
table.refresh();
deleteManifestFiles = table.currentSnapshot().deleteManifests(table.io());
try (ManifestReader<DeleteFile> reader =
ManifestFiles.readDeleteManifest(deleteManifestFiles.get(0), table.io(), specMap)) {
DeleteFile file = reader.iterator().next();
InputFile inputFile = table.io().newInputFile(file.path().toString());
Assertions.assertThat(getCompressionType(inputFile))
.isEqualToIgnoringCase(properties.get(COMPRESSION_CODEC));
}
SparkActions.get(spark)
.rewritePositionDeletes(table)
.option(SizeBasedFileRewriter.REWRITE_ALL, "true")
.execute();
table.refresh();
deleteManifestFiles = table.currentSnapshot().deleteManifests(table.io());
try (ManifestReader<DeleteFile> reader =
ManifestFiles.readDeleteManifest(deleteManifestFiles.get(0), table.io(), specMap)) {
DeleteFile file = reader.iterator().next();
InputFile inputFile = table.io().newInputFile(file.path().toString());
Assertions.assertThat(getCompressionType(inputFile))
.isEqualToIgnoringCase(properties.get(COMPRESSION_CODEC));
}
}

Expand Down
Loading