From 93e39fc833d17aa8cbcbee54937ab48e32937aeb Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Wed, 10 Jul 2024 22:05:31 +0000 Subject: [PATCH] fix invalidation logic --- pyiceberg/io/pyarrow.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index ae7799cfde..da1f650eaf 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1860,6 +1860,7 @@ def data_file_statistics_from_parquet_metadata( col_aggs = {} + invalidate_col: Set[int] = set() for r in range(parquet_metadata.num_row_groups): # References: # https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232 @@ -1875,8 +1876,6 @@ def data_file_statistics_from_parquet_metadata( else: split_offsets.append(data_offset) - invalidate_col: Set[int] = set() - for pos in range(parquet_metadata.num_columns): column = row_group.column(pos) field_id = parquet_column_mapping[column.path_in_schema]