Skip to content

Commit

Permalink
enhance unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
jqin61 committed Mar 12, 2024
1 parent dafb0a4 commit cbf1afd
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 31 deletions.
20 changes: 1 addition & 19 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,24 +92,6 @@ class UpdateMetrics:
added_eq_deletes: int
removed_eq_deletes: int

# def clear() {
# self.added_file_size = 0
# self.removed_file_size = 0
# self.added_data_files = 0
# self.removed_data_files = 0
# self.added_eq_delete_files = 0
# self.removed_eq_delete_files = 0
# self.added_pos_delete_files = 0
# self.removed_pos_delete_files = 0
# self.added_delete_files = 0
# self.removed_delete_files = 0
# self.added_records = 0
# self.deleted_records = 0
# self.added_pos_deletes = 0
# self.removed_pos_deletes = 0
# self.added_eq_deletes = 0
# self.removed_eq_deletes = 0
# }
def __init__(self) -> None:
self.added_file_size = 0
self.removed_file_size = 0
Expand Down Expand Up @@ -303,7 +285,7 @@ def remove_file(self, data_file: DataFile, partition_spec: Optional[PartitionSpe
self.metrics.remove_file(data_file)
if getattr(data_file, "partition", None) is not None and len(data_file.partition.record_fields()) != 0:
if partition_spec is None or schema is None:
raise ValueError("add data file with partition but without specifying the partiton_spec and schema")
raise ValueError("remove data file with partition but without specifying the partiton_spec and schema")
self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema)

def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None:
Expand Down
40 changes: 28 additions & 12 deletions tests/table/test_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,6 @@ def data_file() -> DataFile:
)


@pytest.fixture
def data_file_with_partition() -> DataFile:
return DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1))


def test_snapshot_summary_collector(data_file: DataFile) -> None:
ssc = SnapshotSummaryCollector()

Expand All @@ -175,7 +170,9 @@ def test_snapshot_summary_collector(data_file: DataFile) -> None:
}


def test_snapshot_summary_collector_with_partition(data_file_with_partition: DataFile) -> None:
def test_snapshot_summary_collector_with_partition() -> None:
# Given

ssc = SnapshotSummaryCollector()

assert ssc.build() == {}
Expand All @@ -185,19 +182,38 @@ def test_snapshot_summary_collector_with_partition(data_file_with_partition: Dat
NestedField(field_id=3, name="int_field", field_type=IntegerType(), required=False),
)
spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name='int_field'))
data_file_1 = DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1))
data_file_2 = DataFile(content=DataFileContent.DATA, record_count=200, file_size_in_bytes=4321, partition=Record(int_field=2))
# When
ssc.add_file(data_file=data_file_1, schema=schema, partition_spec=spec)
ssc.remove_file(data_file=data_file_1, schema=schema, partition_spec=spec)
ssc.remove_file(data_file=data_file_2, schema=schema, partition_spec=spec)

# Then
assert ssc.build() == {
'added-files-size': '1234',
'removed-files-size': '5555',
'added-data-files': '1',
'deleted-data-files': '2',
'added-records': '100',
'deleted-records': '300',
'changed-partition-count': '2',
}

# When
ssc.set_partition_summary_limit(10)
ssc.add_file(data_file=data_file_with_partition, schema=schema, partition_spec=spec)
ssc.remove_file(data_file=data_file_with_partition, schema=schema, partition_spec=spec)

# Then
assert ssc.build() == {
'added-files-size': '1234',
'removed-files-size': '1234',
'removed-files-size': '5555',
'added-data-files': '1',
'deleted-data-files': '1',
'deleted-data-files': '2',
'added-records': '100',
'deleted-records': '100',
'changed-partition-count': '1',
'deleted-records': '300',
'changed-partition-count': '2',
'partitions.int_field=1': 'added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100',
'partitions.int_field=2': 'removed-files-size=4321,deleted-data-files=1,deleted-records=200',
}


Expand Down

0 comments on commit cbf1afd

Please sign in to comment.