From d350c9b8c995a2953aa8b80a0a1fc7cadc4dd16a Mon Sep 17 00:00:00 2001 From: Xinbin Huang Date: Fri, 9 Dec 2022 14:51:03 -0800 Subject: [PATCH] API: IsPartitioned to return true for V1 Tables with all void transforms (#3059) --- api/src/main/java/org/apache/iceberg/PartitionSpec.java | 2 +- .../java/org/apache/iceberg/transforms/Transform.java | 9 +++++++++ .../org/apache/iceberg/transforms/VoidTransform.java | 5 +++++ core/src/main/java/org/apache/iceberg/DataFiles.java | 8 ++++++-- core/src/main/java/org/apache/iceberg/FileMetadata.java | 6 ++++-- .../java/org/apache/iceberg/TestPartitionSpecInfo.java | 9 +++++++++ .../test/java/org/apache/iceberg/TestPartitioning.java | 5 +++++ 7 files changed, 39 insertions(+), 5 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java index aa6a6051e561..a31cfd76583b 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java +++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java @@ -87,7 +87,7 @@ public List fields() { } public boolean isPartitioned() { - return fields.length > 0; + return fields.length > 0 && fields().stream().anyMatch(f -> !f.transform().isVoid()); } public boolean isUnpartitioned() { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 6905eddc6596..5a56b672b1b1 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -136,6 +136,15 @@ default boolean isIdentity() { return false; } + /** + * Return whether this transform is the void transform. + * + * @return true if this is a void transform, false otherwise + */ + default boolean isVoid() { + return false; + } + /** * Returns a human-readable String representation of a transformed value. * diff --git a/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java index cbab2879ffe8..5e8e7494c4b5 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java @@ -79,6 +79,11 @@ public UnboundPredicate project(String name, BoundPredicate predicate) return null; } + @Override + public boolean isVoid() { + return true; + } + @Override public String toHumanString(Void value) { return "null"; diff --git a/core/src/main/java/org/apache/iceberg/DataFiles.java b/core/src/main/java/org/apache/iceberg/DataFiles.java index 9a23a09ec231..9b3c5bc67621 100644 --- a/core/src/main/java/org/apache/iceberg/DataFiles.java +++ b/core/src/main/java/org/apache/iceberg/DataFiles.java @@ -41,6 +41,8 @@ static PartitionData newPartitionData(PartitionSpec spec) { static PartitionData copyPartitionData( PartitionSpec spec, StructLike partitionData, PartitionData reuse) { + Preconditions.checkArgument( + spec.isPartitioned(), "Can't copy partition data to a unpartitioned table"); PartitionData data = reuse; if (data == null) { data = newPartitionData(spec); @@ -136,7 +138,7 @@ public static class Builder { public Builder(PartitionSpec spec) { this.spec = spec; this.specId = spec.specId(); - this.isPartitioned = spec.fields().size() > 0; + this.isPartitioned = spec.isPartitioned(); this.partitionData = isPartitioned ? newPartitionData(spec) : null; } @@ -219,7 +221,9 @@ public Builder withFormat(FileFormat newFormat) { } public Builder withPartition(StructLike newPartition) { - this.partitionData = copyPartitionData(spec, newPartition, partitionData); + if (isPartitioned) { + this.partitionData = copyPartitionData(spec, newPartition, partitionData); + } return this; } diff --git a/core/src/main/java/org/apache/iceberg/FileMetadata.java b/core/src/main/java/org/apache/iceberg/FileMetadata.java index 7d025fa8a630..fa286680b660 100644 --- a/core/src/main/java/org/apache/iceberg/FileMetadata.java +++ b/core/src/main/java/org/apache/iceberg/FileMetadata.java @@ -60,7 +60,7 @@ public static class Builder { Builder(PartitionSpec spec) { this.spec = spec; this.specId = spec.specId(); - this.isPartitioned = spec.fields().size() > 0; + this.isPartitioned = spec.isPartitioned(); this.partitionData = isPartitioned ? DataFiles.newPartitionData(spec) : null; } @@ -154,7 +154,9 @@ public Builder withFormat(FileFormat newFormat) { } public Builder withPartition(StructLike newPartition) { - this.partitionData = DataFiles.copyPartitionData(spec, newPartition, partitionData); + if (isPartitioned) { + this.partitionData = DataFiles.copyPartitionData(spec, newPartition, partitionData); + } return this; } diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java index 73828fa33ae5..46f38b97f2a4 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java @@ -63,11 +63,20 @@ public void cleanupTables() { TestTables.clearTables(); } + @Test + public void testSpecIsUnpartitionedForVoidTranforms() { + PartitionSpec spec = + PartitionSpec.builderFor(schema).alwaysNull("id").alwaysNull("data").build(); + + Assert.assertTrue(spec.isUnpartitioned()); + } + @Test public void testSpecInfoUnpartitionedTable() { PartitionSpec spec = PartitionSpec.unpartitioned(); TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); + Assert.assertTrue(spec.isUnpartitioned()); Assert.assertEquals(spec, table.spec()); Assert.assertEquals(spec.lastAssignedFieldId(), table.spec().lastAssignedFieldId()); Assert.assertEquals(ImmutableMap.of(spec.specId(), spec), table.specs()); diff --git a/core/src/test/java/org/apache/iceberg/TestPartitioning.java b/core/src/test/java/org/apache/iceberg/TestPartitioning.java index 5dfc9ac40ee9..46c9cb1f6139 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitioning.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitioning.java @@ -78,6 +78,11 @@ public void testPartitionTypeWithSpecEvolutionInV1Tables() { NestedField.optional(1001, "category_bucket_8", Types.IntegerType.get())); StructType actualType = Partitioning.partitionType(table); Assert.assertEquals("Types must match", expectedType, actualType); + + table.updateSpec().removeField("data").removeField("category_bucket_8").commit(); + + Assert.assertEquals("Should have 3 specs", 3, table.specs().size()); + Assert.assertTrue("PartitionSpec should be unpartitioned", table.spec().isUnpartitioned()); } @Test