diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 769bb7bf8a..b7a321a6c2 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -92,24 +92,6 @@ class UpdateMetrics: added_eq_deletes: int removed_eq_deletes: int - # def clear() { - # self.added_file_size = 0 - # self.removed_file_size = 0 - # self.added_data_files = 0 - # self.removed_data_files = 0 - # self.added_eq_delete_files = 0 - # self.removed_eq_delete_files = 0 - # self.added_pos_delete_files = 0 - # self.removed_pos_delete_files = 0 - # self.added_delete_files = 0 - # self.removed_delete_files = 0 - # self.added_records = 0 - # self.deleted_records = 0 - # self.added_pos_deletes = 0 - # self.removed_pos_deletes = 0 - # self.added_eq_deletes = 0 - # self.removed_eq_deletes = 0 - # } def __init__(self) -> None: self.added_file_size = 0 self.removed_file_size = 0 @@ -303,7 +285,7 @@ def remove_file(self, data_file: DataFile, partition_spec: Optional[PartitionSpe self.metrics.remove_file(data_file) if getattr(data_file, "partition", None) is not None and len(data_file.partition.record_fields()) != 0: if partition_spec is None or schema is None: - raise ValueError("add data file with partition but without specifying the partiton_spec and schema") + raise ValueError("remove data file with partition but without specifying the partiton_spec and schema") self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema) def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None: diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index 35ac3e280b..7c4fe11841 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -156,11 +156,6 @@ def data_file() -> DataFile: ) -@pytest.fixture -def data_file_with_partition() -> DataFile: - return DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1)) - - def test_snapshot_summary_collector(data_file: DataFile) -> None: ssc = SnapshotSummaryCollector() @@ -175,7 +170,9 @@ def test_snapshot_summary_collector(data_file: DataFile) -> None: } -def test_snapshot_summary_collector_with_partition(data_file_with_partition: DataFile) -> None: +def test_snapshot_summary_collector_with_partition() -> None: + # Given + ssc = SnapshotSummaryCollector() assert ssc.build() == {} @@ -185,19 +182,38 @@ def test_snapshot_summary_collector_with_partition(data_file_with_partition: Dat NestedField(field_id=3, name="int_field", field_type=IntegerType(), required=False), ) spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name='int_field')) + data_file_1 = DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1)) + data_file_2 = DataFile(content=DataFileContent.DATA, record_count=200, file_size_in_bytes=4321, partition=Record(int_field=2)) + # When + ssc.add_file(data_file=data_file_1, schema=schema, partition_spec=spec) + ssc.remove_file(data_file=data_file_1, schema=schema, partition_spec=spec) + ssc.remove_file(data_file=data_file_2, schema=schema, partition_spec=spec) + + # Then + assert ssc.build() == { + 'added-files-size': '1234', + 'removed-files-size': '5555', + 'added-data-files': '1', + 'deleted-data-files': '2', + 'added-records': '100', + 'deleted-records': '300', + 'changed-partition-count': '2', + } + + # When ssc.set_partition_summary_limit(10) - ssc.add_file(data_file=data_file_with_partition, schema=schema, partition_spec=spec) - ssc.remove_file(data_file=data_file_with_partition, schema=schema, partition_spec=spec) + # Then assert ssc.build() == { 'added-files-size': '1234', - 'removed-files-size': '1234', + 'removed-files-size': '5555', 'added-data-files': '1', - 'deleted-data-files': '1', + 'deleted-data-files': '2', 'added-records': '100', - 'deleted-records': '100', - 'changed-partition-count': '1', + 'deleted-records': '300', + 'changed-partition-count': '2', 'partitions.int_field=1': 'added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100', + 'partitions.int_field=2': 'removed-files-size=4321,deleted-data-files=1,deleted-records=200', }