From 3d929d459ab21efe313f42754b58b4392d547aee Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 14 Jul 2023 09:26:19 +0200 Subject: [PATCH] Core: Handle optional fields (#8050) * Core: Handle allow optional fields We expect: - current-snapshot-id - properties - snapshots to be there, but they are actually optional. * Use AssertJ --- .../apache/iceberg/TableMetadataParser.java | 38 +++++++--- .../org/apache/iceberg/TestTableMetadata.java | 10 +++ .../TableMetadataV2ValidMinimal.json | 71 +++++++++++++++++++ 3 files changed, 108 insertions(+), 11 deletions(-) create mode 100644 core/src/test/resources/TableMetadataV2ValidMinimal.json diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 601aff55169d..473f8497cbb4 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -431,15 +431,26 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { defaultSortOrderId = defaultSortOrder.orderId(); } - // parse properties map - Map properties = JsonUtil.getStringMap(PROPERTIES, node); - long currentSnapshotId = JsonUtil.getLong(CURRENT_SNAPSHOT_ID, node); + Map properties; + if (node.has(PROPERTIES)) { + // parse properties map + properties = JsonUtil.getStringMap(PROPERTIES, node); + } else { + properties = ImmutableMap.of(); + } + + Long currentSnapshotId = JsonUtil.getLongOrNull(CURRENT_SNAPSHOT_ID, node); + if (currentSnapshotId == null) { + // This field is optional, but internally we set this to -1 when not set + currentSnapshotId = -1L; + } + long lastUpdatedMillis = JsonUtil.getLong(LAST_UPDATED_MILLIS, node); Map refs; if (node.has(REFS)) { refs = refsFromJson(node.get(REFS)); - } else if (currentSnapshotId != -1) { + } else if (currentSnapshotId != -1L) { // initialize the main branch if there are no refs refs = ImmutableMap.of( @@ -448,14 +459,19 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { refs = ImmutableMap.of(); } - JsonNode snapshotArray = JsonUtil.get(SNAPSHOTS, node); - Preconditions.checkArgument( - snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray); + List snapshots; + if (node.has(SNAPSHOTS)) { + JsonNode snapshotArray = JsonUtil.get(SNAPSHOTS, node); + Preconditions.checkArgument( + snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray); - List snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); - Iterator iterator = snapshotArray.elements(); - while (iterator.hasNext()) { - snapshots.add(SnapshotParser.fromJson(iterator.next())); + snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); + Iterator iterator = snapshotArray.elements(); + while (iterator.hasNext()) { + snapshots.add(SnapshotParser.fromJson(iterator.next())); + } + } else { + snapshots = ImmutableList.of(); } List statisticsFiles; diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 879d4e73e210..3e4ff1504263 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1324,6 +1324,16 @@ public void testParseSchemaIdentifierFields() throws Exception { Assert.assertEquals(Sets.newHashSet(1, 2), parsed.schemasById().get(1).identifierFieldIds()); } + @Test + public void testParseMinimal() throws Exception { + String data = readTableMetadataInputFile("TableMetadataV2ValidMinimal.json"); + TableMetadata parsed = TableMetadataParser.fromJson(data); + Assertions.assertThat(parsed.snapshots()).isEmpty(); + Assertions.assertThat(parsed.snapshotLog()).isEmpty(); + Assertions.assertThat(parsed.properties()).isEmpty(); + Assertions.assertThat(parsed.previousFiles()).isEmpty(); + } + @Test public void testUpdateSchemaIdentifierFields() { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); diff --git a/core/src/test/resources/TableMetadataV2ValidMinimal.json b/core/src/test/resources/TableMetadataV2ValidMinimal.json new file mode 100644 index 000000000000..529b10d1fdf1 --- /dev/null +++ b/core/src/test/resources/TableMetadataV2ValidMinimal.json @@ -0,0 +1,71 @@ +{ + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ] +} \ No newline at end of file