From 8c7d8553be03934da2995766be6cf109b3c12ce4 Mon Sep 17 00:00:00 2001 From: Venki Korukanti Date: Thu, 5 Jan 2023 10:20:18 -0800 Subject: [PATCH 1/3] wip GitOrigin-RevId: 1a371c60129b789b92f494a86e164e2dd18da03d --- .../spark/sql/delta/DeltaColumnMapping.scala | 3 +- .../apache/spark/sql/delta/DeltaConfig.scala | 10 + .../sql/delta/DeltaParquetFileFormat.scala | 23 +- .../org/apache/spark/sql/delta/DeltaUDF.scala | 6 + .../sql/delta/PreprocessTableWithDVs.scala | 182 ++++++++++++++++ .../org/apache/spark/sql/delta/Snapshot.scala | 3 +- .../sql/delta/SubqueryTransformerHelper.scala | 58 ++++++ .../apache/spark/sql/delta/TableFeature.scala | 13 +- .../spark/sql/delta/actions/actions.scala | 6 +- .../delta/commands/DeletionVectorUtils.scala | 86 ++++++++ .../perf/OptimizeMetadataOnlyDeltaQuery.scala | 9 +- .../sql/delta/stats/PrepareDeltaScan.scala | 33 +-- ...ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc | Bin 0 -> 12 bytes ...cbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc | Bin 0 -> 12 bytes ...-be5d-eada75aceb4f-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a5d5-621cd92cab11-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-85dd-e63ddc5931bf-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8e26-18a77cf875f0-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-83a6-e7f57db28650-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-ba21-029309ab8736-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a756-4d0d63ef9fcb-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8f64-bf0bf072de5f-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8816-19eccf7939f5-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8cba-eba4eeb3b11a-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8e51-ecb551c89430-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-b98d-b942db5b7359-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-b283-9e2308ef5487-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a5d9-be1a2bc2431d-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a231-1a2d08a83a0e-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-bb03-afba1e70ea29-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a5db-d1b8d682153b-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-b709-e628f9bf2553-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-bb51-fc5b4dd0be10-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-a258-738e585a78a5-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...-8451-13edcd855b15-c000.snappy.parquet.crc | Bin 0 -> 16 bytes .../_delta_log/.00000000000000000000.crc.crc | Bin 0 -> 100 bytes .../_delta_log/.00000000000000000000.json.crc | Bin 0 -> 88 bytes .../_delta_log/.00000000000000000001.crc.crc | Bin 0 -> 104 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 0 -> 52 bytes .../_delta_log/.00000000000000000002.crc.crc | Bin 0 -> 108 bytes .../_delta_log/.00000000000000000002.json.crc | Bin 0 -> 16 bytes .../_delta_log/.00000000000000000003.crc.crc | Bin 0 -> 112 bytes .../_delta_log/.00000000000000000003.json.crc | Bin 0 -> 64 bytes .../_delta_log/.00000000000000000004.crc.crc | Bin 0 -> 112 bytes .../_delta_log/.00000000000000000004.json.crc | Bin 0 -> 16 bytes .../_delta_log/00000000000000000000.crc | 1 + .../_delta_log/00000000000000000000.json | 23 ++ .../_delta_log/00000000000000000001.crc | 1 + .../_delta_log/00000000000000000001.json | 11 + .../_delta_log/00000000000000000002.crc | 1 + .../_delta_log/00000000000000000002.json | 2 + .../_delta_log/00000000000000000003.crc | 1 + .../_delta_log/00000000000000000003.json | 13 ++ .../_delta_log/00000000000000000004.crc | 1 + .../_delta_log/00000000000000000004.json | 2 + ...r_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin | Bin 0 -> 211 bytes ...r_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin | Bin 0 -> 259 bytes ...41cc-be5d-eada75aceb4f-c000.snappy.parquet | Bin 0 -> 600 bytes ...4782-a5d5-621cd92cab11-c000.snappy.parquet | Bin 0 -> 600 bytes ...4ba5-85dd-e63ddc5931bf-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4770-8e26-18a77cf875f0-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4b79-83a6-e7f57db28650-c000.snappy.parquet | Bin 0 -> 1007 bytes ...4896-ba21-029309ab8736-c000.snappy.parquet | Bin 0 -> 1008 bytes ...43d8-a756-4d0d63ef9fcb-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4f3e-8f64-bf0bf072de5f-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4feb-8816-19eccf7939f5-c000.snappy.parquet | Bin 0 -> 1008 bytes ...419e-8cba-eba4eeb3b11a-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4f3c-8e51-ecb551c89430-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4f15-b98d-b942db5b7359-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet | Bin 0 -> 1008 bytes ...43b8-b283-9e2308ef5487-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4e9f-a231-1a2d08a83a0e-c000.snappy.parquet | Bin 0 -> 1008 bytes ...41f5-bb03-afba1e70ea29-c000.snappy.parquet | Bin 0 -> 1007 bytes ...4553-a5db-d1b8d682153b-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4e12-b709-e628f9bf2553-c000.snappy.parquet | Bin 0 -> 1008 bytes ...42cb-bb51-fc5b4dd0be10-c000.snappy.parquet | Bin 0 -> 1008 bytes ...4e4d-a258-738e585a78a5-c000.snappy.parquet | Bin 0 -> 1008 bytes ...47ec-8451-13edcd855b15-c000.snappy.parquet | Bin 0 -> 1007 bytes ...a98cdd-7843-470d-8897-708cdffa38c5.bin.crc | Bin 0 -> 12 bytes .../_delta_log/.00000000000000000000.crc.crc | Bin 0 -> 32 bytes .../_delta_log/.00000000000000000000.json.crc | Bin 0 -> 24 bytes .../_delta_log/.00000000000000000001.crc.crc | Bin 0 -> 32 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 0 -> 24 bytes .../_delta_log/00000000000000000000.crc | 1 + .../_delta_log/00000000000000000000.json | 4 + .../_delta_log/00000000000000000001.crc | 1 + .../_delta_log/00000000000000000001.json | 3 + ...r_b6a98cdd-7843-470d-8897-708cdffa38c5.bin | Bin 0 -> 45 bytes ...-8b2d-ce6a1a94a34a-c000.snappy.parquet.crc | Bin 0 -> 16 bytes ...4437-8b2d-ce6a1a94a34a-c000.snappy.parquet | Bin 0 -> 818 bytes .../sql/delta/ActionSerializerSuite.scala | 106 ++++++++++ .../spark/sql/delta/CheckpointsSuite.scala | 5 +- .../delta/DeltaParquetFileFormatSuite.scala | 4 +- .../DeletionVectorsSuite.scala | 197 ++++++++++++++++++ 96 files changed, 766 insertions(+), 43 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala create mode 100644 core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala create mode 100644 core/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala create mode 100644 core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000001.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000001.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000002.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000002.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.json create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.json create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000004.crc create mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000004.json create mode 100644 core/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin create mode 100644 core/src/test/resources/delta/table-with-dv-large/deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet create mode 100644 core/src/test/resources/delta/table-with-dv-small/.deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.crc.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.json.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000000.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000000.json create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json create mode 100644 core/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin create mode 100644 core/src/test/resources/delta/table-with-dv-small/r4/.part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet.crc create mode 100644 core/src/test/resources/delta/table-with-dv-small/r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet create mode 100644 core/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala diff --git a/core/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala b/core/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala index ca2f1d93447..03cc1bb7e59 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/DeltaColumnMapping.scala @@ -52,7 +52,8 @@ trait DeltaColumnMappingBase extends DeltaLogging { protected val DELTA_INTERNAL_COLUMNS: Set[String] = (CDCReader.CDC_COLUMNS_IN_DATA ++ Seq( CDCReader.CDC_COMMIT_VERSION, - CDCReader.CDC_COMMIT_TIMESTAMP) + CDCReader.CDC_COMMIT_TIMESTAMP, + DeltaParquetFileFormat.IS_ROW_DELETED_COLUMN_NAME) ).map(_.toLowerCase(Locale.ROOT)).toSet val supportedModes: Set[DeltaColumnMappingMode] = diff --git a/core/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala b/core/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala index 745acae4aa9..a5446ac981c 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala @@ -395,6 +395,16 @@ trait DeltaConfigsBase extends DeltaLogging { helpMessage = "needs to be a boolean.", minimumProtocolVersion = Some(AppendOnlyTableFeature.minProtocolVersion)) + /** + * Whether commands modifying this Delta table are allowed to create new deletion vectors. + */ + val ENABLE_DELETION_VECTORS_CREATION = buildConfig[Boolean]( + key = "enableDeletionVectors", + defaultValue = "false", + fromString = _.toBoolean, + validationFunction = _ => true, + helpMessage = "needs to be a boolean.", + minimumProtocolVersion = Some(DeletionVectorsTableFeature.minProtocolVersion)) /** * Whether this table will automatically optimize the layout of files during writes. diff --git a/core/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala b/core/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala index 2e26a161bb1..15993598d65 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/DeltaParquetFileFormat.scala @@ -16,6 +16,8 @@ package org.apache.spark.sql.delta +import java.net.URI + import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal @@ -49,12 +51,21 @@ class DeltaParquetFileFormat( val isSplittable: Boolean = true, val disablePushDowns: Boolean = false, val tablePath: Option[String] = None, - val broadcastDvMap: Option[Broadcast[Map[String, DeletionVectorDescriptor]]] = None, + val broadcastDvMap: Option[Broadcast[Map[URI, DeletionVectorDescriptor]]] = None, val broadcastHadoopConf: Option[Broadcast[SerializableConfiguration]] = None) extends ParquetFileFormat { // Validate either we have all arguments for DV enabled read or none of them. - require(!(broadcastHadoopConf.isDefined ^ broadcastDvMap.isDefined ^ tablePath.isDefined ^ - !isSplittable ^ disablePushDowns)) + if (broadcastHadoopConf.isDefined) { + require( + broadcastHadoopConf.isDefined && broadcastDvMap.isDefined && + tablePath.isDefined && !isSplittable && disablePushDowns, + "Wrong arguments for Delta table scan with deletion vectors") + } else { + require( + broadcastHadoopConf.isEmpty && broadcastDvMap.isEmpty && + tablePath.isEmpty && isSplittable && !disablePushDowns, + "Wrong arguments for Delta table scan with no deletion vectors") + } val columnMappingMode: DeltaColumnMappingMode = metadata.columnMappingMode val referenceSchema: StructType = metadata.schema @@ -147,7 +158,7 @@ class DeltaParquetFileFormat( def copyWithDVInfo( tablePath: String, - broadcastDvMap: Broadcast[Map[String, DeletionVectorDescriptor]], + broadcastDvMap: Broadcast[Map[URI, DeletionVectorDescriptor]], broadcastHadoopConf: Broadcast[SerializableConfiguration]): DeltaParquetFileFormat = { new DeltaParquetFileFormat( metadata, @@ -169,10 +180,10 @@ class DeltaParquetFileFormat( isRowDeletedColumnIdx: Int, useOffHeapBuffers: Boolean): Iterator[Object] = { val filePath = partitionedFile.filePath - val absolutePath = new Path(filePath).toString + val pathUri = new Path(filePath).toUri // Fetch the DV descriptor from the broadcast map and create a row index filter - val dvDescriptor = broadcastDvMap.get.value.get(absolutePath) + val dvDescriptor = broadcastDvMap.get.value.get(pathUri) val rowIndexFilter = DeletedRowsMarkingFilter.createInstance( dvDescriptor.getOrElse(DeletionVectorDescriptor.EMPTY), broadcastHadoopConf.get.value.value, diff --git a/core/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala b/core/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala index fd4e302ff24..fb5647b82fb 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/DeltaUDF.scala @@ -47,6 +47,9 @@ object DeltaUDF { def booleanFromMap(f: Map[String, String] => Boolean): UserDefinedFunction = createUdfFromTemplateUnsafe(booleanFromMapTemplate, f, udf(f)) + def booleanFromByte(x: Byte => Boolean): UserDefinedFunction = + createUdfFromTemplateUnsafe(booleanFromByteTemplate, x, udf(x)) + private lazy val stringFromStringTemplate = udf[String, String](identity).asInstanceOf[SparkUserDefinedFunction] @@ -64,6 +67,9 @@ object DeltaUDF { private lazy val booleanFromMapTemplate = udf((_: Map[String, String]) => true).asInstanceOf[SparkUserDefinedFunction] + private lazy val booleanFromByteTemplate = + udf((_: Byte) => true).asInstanceOf[SparkUserDefinedFunction] + /** * Return a `UserDefinedFunction` for the given `f` from `template` if * `INTERNAL_UDF_OPTIMIZATION_ENABLED` is enabled. Otherwise, `orElse` will be called to create a diff --git a/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala b/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala new file mode 100644 index 00000000000..2b3996b707e --- /dev/null +++ b/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala @@ -0,0 +1,182 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import java.net.URI + +import org.apache.spark.sql.delta.RowIndexFilter +import org.apache.spark.sql.delta.DeltaParquetFileFormat._ +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor +import org.apache.spark.sql.delta.commands.DeletionVectorUtils.deletionVectorsReadable +import org.apache.spark.sql.delta.files.{TahoeFileIndex, TahoeLogFileIndex} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.DeltaFileOperations.absolutePath + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.sql.{Column, SparkSession} +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.types.StructType +import org.apache.spark.util.SerializableConfiguration + +/** + * Plan transformer to inject a filter that removes the rows marked as deleted according to + * deletion vectors. For tables with no deletion vectors, this transformation has no effect. + * + * It modifies for plan for tables with deletion vectors as follows: + * Before rule: -> Delta Scan (key, value). + * - Here we are reading `key`, `value`` columns from the Delta table + * After rule: + * -> + * Project(key, value) -> + * Filter (udf(__skip_row == 0) -> + * Delta Scan (key, value, __skip_row) + * - Here we insert a new column `__skip_row` in Delta scan. This value is populated by the + * Parquet reader using the DV corresponding to the Parquet file read + * (See [[DeltaParquetFileFormat]]) and it contains 0 if we want to keep the row. + * The scan created also disables Parquet file splitting and filter pushdowns, because + * in order to generate the __skip_row, we need to read the rows in a file consecutively + * to generate the row index. This is a cost we need to pay until we upgrade to latest + * Apache Spark which contains Parquet reader changes that automatically generate the + * row_index irrespective of the file splitting and filter pushdowns. + * - The scan created also contains a broadcast variable of Parquet File -> DV File map. + * The Parquet reader created uses this map to find the DV file corresponding to the data file. + * - Filter created filters out rows with __skip_row equals to 0 + * - And at the end we have a Project to keep the plan node output same as before the rule is + * applied. + */ +trait PreprocessTableWithDVs extends SubqueryTransformerHelper { + def preprocessTablesWithDVs(plan: LogicalPlan): LogicalPlan = { + transformWithSubqueries(plan) { + case ScanWithDeletionVectors(dvScan) => dvScan + } + } +} + +object ScanWithDeletionVectors { + def unapply(a: LogicalRelation): Option[LogicalPlan] = a match { + case scan @ LogicalRelation( + relation @ HadoopFsRelation( + index: TahoeFileIndex, _, _, _, format: DeltaParquetFileFormat, _), _, _, _) => + dvEnabledScanFor(scan, relation, format, index) + case _ => None + } + + def dvEnabledScanFor( + scan: LogicalRelation, + hadoopRelation: HadoopFsRelation, + fileFormat: DeltaParquetFileFormat, + index: TahoeFileIndex): Option[LogicalPlan] = { + // If the table has no DVs enabled, no change needed + if (!deletionVectorsReadable(index.protocol, index.metadata)) return None + + require(!index.isInstanceOf[TahoeLogFileIndex], + "Cannot work with a non-pinned table snapshot of the TahoeFileIndex") + + // If the table has no DVs enabled, no change needed + if (!deletionVectorsReadable(index.protocol, index.metadata)) return None + + // See if the relation is already modified to include DV reads as part of + // a previous invocation of this rule on this table + if (fileFormat.hasDeletionVectorMap()) return None + + // See if any files actually have a DV + val spark = SparkSession.getActiveSession.get + val filePathToDVBroadcastMap = createBroadcastDVMap(spark, index) + if (filePathToDVBroadcastMap.value.isEmpty) return None + + // Get the list of columns in the output of the `LogicalRelation` we are + // trying to modify. At the end of the plan, we need to return a + // `LogicalRelation` that has the same output as this `LogicalRelation` + val planOutput = scan.output + + val newScan = createScanWithSkipRowColumn( + spark, scan, fileFormat, index, filePathToDVBroadcastMap, hadoopRelation) + + // On top of the scan add a filter that filters out the rows which have + // skip row column value non-zero + val rowIndexFilter = createRowIndexFilterNode(newScan) + + // Now add a project on top of the row index filter node to + // remove the skip row column + Some(Project(planOutput, rowIndexFilter)) + } + /** + * Helper method that creates a new `LogicalRelation` for existing scan that outputs + * an extra column which indicates whether the row needs to be skipped or not. + */ + private def createScanWithSkipRowColumn( + spark: SparkSession, + inputScan: LogicalRelation, + fileFormat: DeltaParquetFileFormat, + tahoeFileIndex: TahoeFileIndex, + filePathToDVBroadcastMap: Broadcast[Map[URI, DeletionVectorDescriptor]], + hadoopFsRelation: HadoopFsRelation): LogicalRelation = { + // Create a new `LogicalRelation` that has modified `DeltaFileFormat` and output with an extra + // column to indicate whether to skip the row or not + + // Add a column for SKIP_ROW to the base output. Value of 0 means the row needs be kept, any + // other values mean the row needs be skipped. + val skipRowField = IS_ROW_DELETED_STRUCT_FIELD + val newScanOutput = inputScan.output :+ + AttributeReference(skipRowField.name, skipRowField.dataType)() + val newScanSchema = StructType(inputScan.schema).add(skipRowField) + + val hadoopConfBroadcast = spark.sparkContext.broadcast( + new SerializableConfiguration(tahoeFileIndex.deltaLog.newDeltaHadoopConf())) + + val newFileFormat = fileFormat.copyWithDVInfo( + tahoeFileIndex.path.toString, filePathToDVBroadcastMap, hadoopConfBroadcast) + val newRelation = hadoopFsRelation.copy( + fileFormat = newFileFormat, + dataSchema = newScanSchema)(hadoopFsRelation.sparkSession) + + // Create a new scan LogicalRelation + inputScan.copy(relation = newRelation, output = newScanOutput) + } + + private def createRowIndexFilterNode(newScan: LogicalRelation): Filter = { + val skipRowColumnRefs = newScan.output.filter(_.name == IS_ROW_DELETED_COLUMN_NAME) + require(skipRowColumnRefs.size == 1, + s"Expected only one column with name=$IS_ROW_DELETED_COLUMN_NAME") + val skipRowColumnRef = skipRowColumnRefs.head + + val keepRow = DeltaUDF.booleanFromByte( _ == RowIndexFilter.KEEP_ROW_VALUE) + .asNondeterministic() // To avoid constant folding the filter based on stats. + + val filterExp = keepRow(new Column(skipRowColumnRef)).expr + Filter(filterExp, newScan) + } + + private def createBroadcastDVMap( + spark: SparkSession, + tahoeFileIndex: TahoeFileIndex): Broadcast[Map[URI, DeletionVectorDescriptor]] = { + // Given there is no way to find the final filters, just select all files in the + // file index and create the DV map. + val filesWithDVs = + tahoeFileIndex.matchingFiles(Seq(TrueLiteral), Seq(TrueLiteral)) + .filter(_.deletionVector != null) + val filePathToDVMap = filesWithDVs + .map(x => + absolutePath(tahoeFileIndex.path.toString, x.path).toUri -> x.deletionVector) + .toMap + spark.sparkContext.broadcast(filePathToDVMap) + } +} diff --git a/core/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala b/core/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala index 32f13c571d0..a0d1295f0ab 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/Snapshot.scala @@ -137,7 +137,8 @@ class Snapshot( col("add.modificationTime"), col("add.dataChange"), col(ADD_STATS_TO_USE_COL_NAME).as("stats"), - col("add.tags") + col("add.tags"), + col("add.deletionVector") ))) .withColumn("remove", when( col("remove.path").isNotNull, diff --git a/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala b/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala new file mode 100644 index 00000000000..ef18ea59d5f --- /dev/null +++ b/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala @@ -0,0 +1,58 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta + +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, SupportsSubquery} + +/** + * Trait to allow processing '''all''' instances of a certain node in a subquery. + * + * Pattern matching in transform cannot be used because of the short-circuiting + * nature of the pattern matching. It stops matching after one instance of + * the certain node is found and remaining nodes in the subquery plan will + * not be transformed. + */ +trait SubqueryTransformerHelper { + + /** + * Transform all nodes matched by the rule in the query plan rooted at given `plan`. + * It requires that the given plan already gone through [[OptimizeSubqueries]] and the + * root node denoting a subquery is removed and optimized appropriately. + */ + def transformWithSubqueries(plan: LogicalPlan) + (rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { + require(!isSubqueryRoot(plan)) + transformSubqueries(plan, rule) transform (rule) + } + + /** Is the give plan a subquery root. */ + def isSubqueryRoot(plan: LogicalPlan): Boolean = { + plan.isInstanceOf[Subquery] || plan.isInstanceOf[SupportsSubquery] + } + + private def transformSubqueries( + plan: LogicalPlan, + rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { + import org.apache.spark.sql.delta.implicits._ + + plan transformAllExpressionsUp { + case subquery: SubqueryExpression => + subquery.withNewPlan(transformWithSubqueries(subquery.plan)(rule)) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala b/core/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala index 9e2c9f00e99..3644133ac71 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala @@ -196,7 +196,8 @@ object TableFeature { IdentityColumnsTableFeature, GeneratedColumnsTableFeature, InvariantsTableFeature, - ColumnMappingTableFeature) + ColumnMappingTableFeature, + DeletionVectorsTableFeature) if (DeltaUtils.isTesting) { features ++= Set( TestLegacyWriterFeature, @@ -295,6 +296,16 @@ object IdentityColumnsTableFeature } } +object DeletionVectorsTableFeature + extends ReaderWriterFeature(name = "deletionVectors") + with FeatureAutomaticallyEnabledByMetadata { + override def metadataRequiresFeatureToBeEnabled( + metadata: Metadata, + spark: SparkSession): Boolean = { + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(metadata) + } +} + /** * Features below are for testing only, and are being registered to the system only in the testing * environment. See [[TableFeature.allSupportedFeaturesMap]] for the registration. diff --git a/core/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala b/core/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala index 37a5102d814..93e4841d77b 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/actions/actions.scala @@ -389,7 +389,8 @@ case class AddFile( modificationTime: Long, override val dataChange: Boolean, stats: String = null, - override val tags: Map[String, String] = null + override val tags: Map[String, String] = null, + deletionVector: DeletionVectorDescriptor = null ) extends FileAction { require(path.nonEmpty) @@ -526,7 +527,8 @@ case class RemoveFile( partitionValues: Map[String, String] = null, @JsonDeserialize(contentAs = classOf[java.lang.Long]) size: Option[Long] = None, - override val tags: Map[String, String] = null + override val tags: Map[String, String] = null, + deletionVector: DeletionVectorDescriptor = null ) extends FileAction { override def wrap: SingleAction = SingleAction(remove = this) diff --git a/core/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala b/core/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala new file mode 100644 index 00000000000..c1f80cbe44e --- /dev/null +++ b/core/src/main/scala/org/apache/spark/sql/delta/commands/DeletionVectorUtils.scala @@ -0,0 +1,86 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.commands + +import org.apache.spark.sql.delta.{DeletionVectorsTableFeature, DeltaConfigs, Snapshot, SnapshotDescriptor} +import org.apache.spark.sql.delta.actions.{Metadata, Protocol} +import org.apache.spark.sql.delta.files.TahoeFileIndex + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.execution.datasources.FileIndex +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf + +trait DeletionVectorUtils { + + /** + * Run a query on the delta log to determine if the given snapshot contains no deletion vectors. + * Return `false` if it does contain deletion vectors. + */ + def isTableDVFree(spark: SparkSession, snapshot: Snapshot): Boolean = { + val dvsReadable = deletionVectorsReadable(snapshot) + + if (dvsReadable) { + val dvCount = snapshot.allFiles + .filter(col("deletionVector").isNotNull) + .limit(1) + .count() + + dvCount == 0L + } else { + true + } + } + + /** + * Returns true if persistent deletion vectors are enabled and + * readable with the current reader version. + */ + def fileIndexSupportsReadingDVs(fileIndex: FileIndex): Boolean = fileIndex match { + case index: TahoeFileIndex => deletionVectorsReadable(index) + case _ => false + } + + def deletionVectorsWritable( + snapshot: SnapshotDescriptor, + newProtocol: Option[Protocol] = None, + newMetadata: Option[Metadata] = None): Boolean = { + def protocol = newProtocol.getOrElse(snapshot.protocol) + def metadata = newMetadata.getOrElse(snapshot.metadata) + protocol.isFeatureEnabled(DeletionVectorsTableFeature) && + DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.fromMetaData(metadata) + } + + def deletionVectorsReadable( + snapshot: SnapshotDescriptor, + newProtocol: Option[Protocol] = None, + newMetadata: Option[Metadata] = None): Boolean = { + deletionVectorsReadable( + newProtocol.getOrElse(snapshot.protocol), + newMetadata.getOrElse(snapshot.metadata)) + } + + def deletionVectorsReadable( + protocol: Protocol, + metadata: Metadata): Boolean = { + protocol.isFeatureEnabled(DeletionVectorsTableFeature) && + metadata.format.provider == "parquet" // DVs are only supported on parquet tables. + } +} + +// To access utilities from places where mixing in a trait is inconvenient. +object DeletionVectorUtils extends DeletionVectorUtils diff --git a/core/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala b/core/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala index f5bb592f53f..057f7bcbace 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/perf/OptimizeMetadataOnlyDeltaQuery.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.delta.DeltaTable import org.apache.spark.sql.delta.files.TahoeLogFileIndex import org.apache.spark.sql.delta.stats.DeltaScanGenerator -import org.apache.spark.sql.functions.{col, count, sum, when} +import org.apache.spark.sql.functions.{coalesce, col, count, lit, sum, when} trait OptimizeMetadataOnlyDeltaQuery { def optimizeQueryWithMetadata(plan: LogicalPlan): LogicalPlan = { @@ -48,10 +48,13 @@ trait OptimizeMetadataOnlyDeltaQuery { /** Return the number of rows in the table or `None` if we cannot calculate it from stats */ private def extractGlobalCount(tahoeLogFileIndex: TahoeLogFileIndex): Option[Long] = { - // TODO Update this to work with DV (https://github.com/delta-io/delta/issues/1485) + // account for deleted rows according to deletion vectors + val dvCardinality = coalesce(col("deletionVector.cardinality"), lit(0)) + val numLogicalRecords = (col("stats.numRecords") - dvCardinality).as("numLogicalRecords") + val row = getDeltaScanGenerator(tahoeLogFileIndex).filesWithStatsForScan(Nil) .agg( - sum("stats.numRecords"), + sum(numLogicalRecords), // Calculate the number of files missing `numRecords` count(when(col("stats.numRecords").isNull, 1))) .first diff --git a/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala index 36b597d6108..30fda5c8847 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala @@ -51,7 +51,8 @@ import org.apache.spark.sql.types.StructType trait PrepareDeltaScanBase extends Rule[LogicalPlan] with PredicateHelper with DeltaLogging - with OptimizeMetadataOnlyDeltaQuery { self: PrepareDeltaScan => + with OptimizeMetadataOnlyDeltaQuery + with PreprocessTableWithDVs { self: PrepareDeltaScan => /** * Tracks the first-access snapshots of other logs planned by this rule. The snapshots are @@ -139,33 +140,15 @@ trait PrepareDeltaScanBase extends Rule[LogicalPlan] // delta scans. val deltaScans = new mutable.HashMap[LogicalPlan, DeltaScan]() - /* - * We need to first prepare the scans in the subqueries of a node. Otherwise, because of the - * short-circuiting nature of the pattern matching in the transform method, if a - * PhysicalOperation node is matched, its subqueries that may contain other PhysicalOperation - * nodes will be skipped. - */ - def transformSubqueries(plan: LogicalPlan): LogicalPlan = { - import org.apache.spark.sql.delta.implicits._ - - plan transformAllExpressionsUp { - case subquery: SubqueryExpression => - subquery.withNewPlan(transform(subquery.plan)) - } - } - - def transform(plan: LogicalPlan): LogicalPlan = - transformSubqueries(plan) transform { - case scan @ DeltaTableScan(canonicalizedPlanWithRemovedProjections, filters, fileIndex, + transformWithSubqueries(plan) { + case scan @ DeltaTableScan(planWithRemovedProjections, filters, fileIndex, limit, delta) => val scanGenerator = getDeltaScanGenerator(fileIndex) - val preparedScan = deltaScans.getOrElseUpdate(canonicalizedPlanWithRemovedProjections, + val preparedScan = deltaScans.getOrElseUpdate(planWithRemovedProjections, filesForScan(scanGenerator, limit, filters, delta)) val preparedIndex = getPreparedIndex(preparedScan, fileIndex) optimizeGeneratedColumns(scan, preparedIndex, filters, limit, delta) } - - transform(plan) } protected def optimizeGeneratedColumns( @@ -204,9 +187,9 @@ trait PrepareDeltaScanBase extends Rule[LogicalPlan] val shouldPrepareDeltaScan = ( spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_STATS_SKIPPING) ) - if (shouldPrepareDeltaScan) { + val updatedPlan = if (shouldPrepareDeltaScan) { // Should not be applied to subqueries to avoid duplicate delta jobs. - val isSubquery = plan.isInstanceOf[Subquery] || plan.isInstanceOf[SupportsSubquery] + val isSubquery = isSubqueryRoot(plan) // Should not be applied to DataSourceV2 write plans, because they'll be planned later // through a V1 fallback and only that later planning takes place within the transaction. val isDataSourceV2 = plan.isInstanceOf[V2WriteCommand] @@ -217,7 +200,6 @@ trait PrepareDeltaScanBase extends Rule[LogicalPlan] if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED)) { plan = optimizeQueryWithMetadata(plan) } - prepareDeltaScan(plan) } else { // If this query is running inside an active transaction and is touching the same table @@ -233,6 +215,7 @@ trait PrepareDeltaScanBase extends Rule[LogicalPlan] // It will fall back to just partition pruning at planning time. plan } + preprocessTablesWithDVs(updatedPlan) } /** diff --git a/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc b/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc new file mode 100644 index 0000000000000000000000000000000000000000..3f9d864cd60ea5d77ec01061376d6754f007314f GIT binary patch literal 12 TcmYc;N@ieSU}Cu6apgDw6$1o4 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc b/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc new file mode 100644 index 0000000000000000000000000000000000000000..94dcca7d8bfbf1b8e820305cd4a38b82e6a27e9c GIT binary patch literal 12 TcmYc;N@ieSU}9)~aHRtP6kG%P literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..400a49907b2d88ddd9b27a008eef05368a71e30b GIT binary patch literal 16 XcmYc;N@ieSU}Bj5bxI9$hv-`XCUym6 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..9b96bf257cec251894d424489f5b699327e17539 GIT binary patch literal 16 XcmYc;N@ieSU}CtNVwcC literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..93d5490dc8b99e758770c122ddd480408f815072 GIT binary patch literal 16 XcmYc;N@ieSU}AW@-D#~z;&N{QCVmAg literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..3154da5403f4dc80d2287b0d8d393493b7c0969e GIT binary patch literal 16 XcmYc;N@ieSU}CsXze3@9z6KKjCGZ5o literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..f8791d82db7fbbcc9b24c70d3fbbbfe3a3dd658c GIT binary patch literal 16 XcmYc;N@ieSU}A7jxsdwT`DYpcB~Asp literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..1d9af4a5bf8a6358d5652770b652dccf9a48dda4 GIT binary patch literal 16 XcmYc;N@ieSU}CVkl4QT*%dV#YC8P#5 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..583a2ab3e1e4098ea39846e9c2005d95a1807715 GIT binary patch literal 16 XcmYc;N@ieSU}DIVe9>B5;nfZRBI5+> literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..f39bfec8dbf30a05c1a118a2d1c26218171c8f18 GIT binary patch literal 16 XcmYc;N@ieSU}7i}i(9&5GP@xFAo~PO literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..74f50ae4c9fb009a3ed80012927d1726bb8f751c GIT binary patch literal 16 XcmYc;N@ieSU}Bgw|Dd>t&J|+-BK-u5 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf4304bacaa68a8ca93a59519eaa9055ea00fbbd GIT binary patch literal 16 XcmYc;N@ieSU}BIv9N~8`taU#CA{GT5 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..2a3bbd225ca212f0a83d522f69c784c9e69297b4 GIT binary patch literal 16 XcmYc;N@ieSU}6ZE6?1M!^~6K~B_suO literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..155e26861bd621ba80d95fa10031a7cc26cbcbf4 GIT binary patch literal 16 XcmYc;N@ieSU}E^bzG~famc0o8DX|67 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..04f2d213b20b04be5d0b80ae9762a4f4c74ca68c GIT binary patch literal 16 XcmYc;N@ieSU}Et0bUOFCaZfD(BY6dc literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..2d577971e110f626089fc613a32d5fdd2806dfcf GIT binary patch literal 16 XcmYc;N@ieSU}E@K{`3E(p8~M}F17~K literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..897bcadfb0d5cc6787469338e361e7ca8352174e GIT binary patch literal 16 XcmYc;N@ieSU}E^563gN3bJ`F9BMk(O literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..6822a31b3d304f6904b564ed161c328ba696620d GIT binary patch literal 16 XcmYc;N@ieSU}7k8blh#d((VEPAtMCb literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..d034d5de558962f71f26664dc31ca5b8e8226d66 GIT binary patch literal 16 XcmYc;N@ieSU}E4@pU16xXSEOj9AE?5 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..39969d9e6de8a2f3e56b46f5b97938e7c4b2ba1a GIT binary patch literal 16 XcmYc;N@ieSU}DhKpJGvW`>qxMAQ}Y2 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000000000000000000000000000000000000..4ba40ce79ae2470e47b095bfc804e4d98f2e9c9b GIT binary patch literal 100 zcmV-q0Gt0~a$^7h00IDYL*12uLIo(IXiNrgNyR>@r^CBT^VJ54ryPEnJOkbKO3E!v zKL1Q0E2}^JhlLy>uJ4!a0n^w>KjtWO$g3s8%L!3&dtKYnvATuu@hIqJXf?8V34wyL&I z@usjOX_4_;~~-|`9DgjPAgOG$&2ZJX^un~0$DuXHR9N;cR)YLw!<&?z+3Ep+D)8@Mgl Sits4vM=}i#Wkzqr$b#dfJ~I6P literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b50092592977039cfd6008982e9d46f6b9b7a5a GIT binary patch literal 64 zcmV-G0KflZa$^7h00IDZk3Z;$D))2(>wGcs++fVXaR*8T%Kn%raaQJx+|2#t4^I1R WR^JeiU8lyiQ5-=LEa$FD<2^M literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc new file mode 100644 index 0000000000000000000000000000000000000000..4902114e72b434b36c386190a20992276c5373b3 GIT binary patch literal 112 zcmV-$0FVD;a$^7h00IEA!EoxVCdcelw&r;ye|?@$jvB?d5m!mUoheujCIwVv)g=w= zm&hC~k>;~~-|`9DgjPAgOG$&2ZJX^un~0z-fk@@Y1L9(hJ;L&r","txnId":"f0ddc566-dfe6-4bd8-b264-ce100f9362ef"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1674064767118}} +{"add":{"path":"part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":4},\"maxValues\":{\"value\":1967},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860000","MIN_INSERTION_TIME":"1674064769860000","MAX_INSERTION_TIME":"1674064769860000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":16},\"maxValues\":{\"value\":1977},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860002","MIN_INSERTION_TIME":"1674064769860002","MAX_INSERTION_TIME":"1674064769860002","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":1999},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860004","MIN_INSERTION_TIME":"1674064769860004","MAX_INSERTION_TIME":"1674064769860004","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":8},\"maxValues\":{\"value\":1914},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860005","MIN_INSERTION_TIME":"1674064769860005","MAX_INSERTION_TIME":"1674064769860005","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":30},\"maxValues\":{\"value\":1992},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860006","MIN_INSERTION_TIME":"1674064769860006","MAX_INSERTION_TIME":"1674064769860006","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":40},\"maxValues\":{\"value\":1990},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860007","MIN_INSERTION_TIME":"1674064769860007","MAX_INSERTION_TIME":"1674064769860007","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":13},\"maxValues\":{\"value\":1897},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860008","MIN_INSERTION_TIME":"1674064769860008","MAX_INSERTION_TIME":"1674064769860008","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":12},\"maxValues\":{\"value\":1987},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860009","MIN_INSERTION_TIME":"1674064769860009","MAX_INSERTION_TIME":"1674064769860009","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":19},\"maxValues\":{\"value\":1993},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860010","MIN_INSERTION_TIME":"1674064769860010","MAX_INSERTION_TIME":"1674064769860010","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":11},\"maxValues\":{\"value\":1984},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860011","MIN_INSERTION_TIME":"1674064769860011","MAX_INSERTION_TIME":"1674064769860011","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":20},\"maxValues\":{\"value\":1974},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860013","MIN_INSERTION_TIME":"1674064769860013","MAX_INSERTION_TIME":"1674064769860013","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":3},\"maxValues\":{\"value\":1996},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770477,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":2},\"maxValues\":{\"value\":1986},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860016","MIN_INSERTION_TIME":"1674064769860016","MAX_INSERTION_TIME":"1674064769860016","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770476,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":22},\"maxValues\":{\"value\":1998},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860017","MIN_INSERTION_TIME":"1674064769860017","MAX_INSERTION_TIME":"1674064769860017","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770529,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":6},\"maxValues\":{\"value\":1983},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.crc new file mode 100644 index 00000000000..cb531c70afc --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"5327cd46-c25b-4127-88fd-5b3c2402691b","tableSizeBytes":20157,"numFiles":20,"numDeletedRecordsOpt":5,"numDeletionVectorsOpt":5,"numMetadata":1,"numProtocol":1,"setTransactions":[],"metadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1674064767118},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[20157,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[15,5,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":30},\"maxValues\":{\"value\":1992},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860006","MIN_INSERTION_TIME":"1674064769860006","MAX_INSERTION_TIME":"1674064769860006","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":43,"sizeInBytes":34,"cardinality":1}},{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}},{"path":"part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":4},\"maxValues\":{\"value\":1967},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860000","MIN_INSERTION_TIME":"1674064769860000","MAX_INSERTION_TIME":"1674064769860000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":169,"sizeInBytes":34,"cardinality":1}},{"path":"part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770477,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":2},\"maxValues\":{\"value\":1986},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860016","MIN_INSERTION_TIME":"1674064769860016","MAX_INSERTION_TIME":"1674064769860016","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":3},\"maxValues\":{\"value\":1996},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":19},\"maxValues\":{\"value\":1993},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860010","MIN_INSERTION_TIME":"1674064769860010","MAX_INSERTION_TIME":"1674064769860010","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770476,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":22},\"maxValues\":{\"value\":1998},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860017","MIN_INSERTION_TIME":"1674064769860017","MAX_INSERTION_TIME":"1674064769860017","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":40},\"maxValues\":{\"value\":1990},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860007","MIN_INSERTION_TIME":"1674064769860007","MAX_INSERTION_TIME":"1674064769860007","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770019,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":16},\"maxValues\":{\"value\":1977},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860002","MIN_INSERTION_TIME":"1674064769860002","MAX_INSERTION_TIME":"1674064769860002","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":8},\"maxValues\":{\"value\":1914},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860005","MIN_INSERTION_TIME":"1674064769860005","MAX_INSERTION_TIME":"1674064769860005","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}},{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770529,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":6},\"maxValues\":{\"value\":1983},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":12},\"maxValues\":{\"value\":1987},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860009","MIN_INSERTION_TIME":"1674064769860009","MAX_INSERTION_TIME":"1674064769860009","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}},{"path":"part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":11},\"maxValues\":{\"value\":1984},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860011","MIN_INSERTION_TIME":"1674064769860011","MAX_INSERTION_TIME":"1674064769860011","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":13},\"maxValues\":{\"value\":1897},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860008","MIN_INSERTION_TIME":"1674064769860008","MAX_INSERTION_TIME":"1674064769860008","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":1999},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860004","MIN_INSERTION_TIME":"1674064769860004","MAX_INSERTION_TIME":"1674064769860004","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":20},\"maxValues\":{\"value\":1974},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860013","MIN_INSERTION_TIME":"1674064769860013","MAX_INSERTION_TIME":"1674064769860013","OPTIMIZE_TARGET_SIZE":"268435456"}}]} diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..d486e40ba0f --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1674064789962,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-f3dd4a29-dc57-42eb-b752-84179135f5b8`.value IN (0, 180, 300, 700, 1800))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"5","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"12828","numDeletedRows":"5","scanTimeMs":"12323","numAddedFiles":"0","rewriteTimeMs":"487"},"engineInfo":"Databricks-Runtime/","txnId":"5327cd46-c25b-4127-88fd-5b3c2402691b"}} +{"remove":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","deletionTimestamp":1674064789957,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":169,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":43,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":true,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}}} diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.crc new file mode 100644 index 00000000000..4a5c7870eff --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.crc @@ -0,0 +1 @@ +{"txnId":"fb0a7015-0096-4d74-821b-3507163c17fa","tableSizeBytes":20757,"numFiles":21,"numDeletedRecordsOpt":5,"numDeletionVectorsOpt":5,"numMetadata":1,"numProtocol":1,"setTransactions":[],"metadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1674064767118},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[20757,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[16,5,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":30},\"maxValues\":{\"value\":1992},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860006","MIN_INSERTION_TIME":"1674064769860006","MAX_INSERTION_TIME":"1674064769860006","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064791593,"dataChange":false,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":300},\"maxValues\":{\"value\":700},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":43,"sizeInBytes":34,"cardinality":1}},{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":33},\"maxValues\":{\"value\":1995},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}},{"path":"part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":4},\"maxValues\":{\"value\":1967},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860000","MIN_INSERTION_TIME":"1674064769860000","MAX_INSERTION_TIME":"1674064769860000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770019,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":5},\"maxValues\":{\"value\":1982},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860003","MIN_INSERTION_TIME":"1674064769860003","MAX_INSERTION_TIME":"1674064769860003","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":169,"sizeInBytes":34,"cardinality":1}},{"path":"part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770477,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":2},\"maxValues\":{\"value\":1986},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860016","MIN_INSERTION_TIME":"1674064769860016","MAX_INSERTION_TIME":"1674064769860016","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":3},\"maxValues\":{\"value\":1996},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":19},\"maxValues\":{\"value\":1993},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860010","MIN_INSERTION_TIME":"1674064769860010","MAX_INSERTION_TIME":"1674064769860010","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770476,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":22},\"maxValues\":{\"value\":1998},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860017","MIN_INSERTION_TIME":"1674064769860017","MAX_INSERTION_TIME":"1674064769860017","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":40},\"maxValues\":{\"value\":1990},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860007","MIN_INSERTION_TIME":"1674064769860007","MAX_INSERTION_TIME":"1674064769860007","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770019,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":16},\"maxValues\":{\"value\":1977},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860002","MIN_INSERTION_TIME":"1674064769860002","MAX_INSERTION_TIME":"1674064769860002","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":8},\"maxValues\":{\"value\":1914},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860005","MIN_INSERTION_TIME":"1674064769860005","MAX_INSERTION_TIME":"1674064769860005","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":18},\"maxValues\":{\"value\":1988},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}},{"path":"part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":12},\"maxValues\":{\"value\":1987},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860009","MIN_INSERTION_TIME":"1674064769860009","MAX_INSERTION_TIME":"1674064769860009","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770529,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":6},\"maxValues\":{\"value\":1983},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770528,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":36},\"maxValues\":{\"value\":1969},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}},{"path":"part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770265,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":13},\"maxValues\":{\"value\":1897},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860008","MIN_INSERTION_TIME":"1674064769860008","MAX_INSERTION_TIME":"1674064769860008","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770319,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":11},\"maxValues\":{\"value\":1984},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860011","MIN_INSERTION_TIME":"1674064769860011","MAX_INSERTION_TIME":"1674064769860011","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770100,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":1},\"maxValues\":{\"value\":1999},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860004","MIN_INSERTION_TIME":"1674064769860004","MAX_INSERTION_TIME":"1674064769860004","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770372,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":20},\"maxValues\":{\"value\":1974},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860013","MIN_INSERTION_TIME":"1674064769860013","MAX_INSERTION_TIME":"1674064769860013","OPTIMIZE_TARGET_SIZE":"268435456"}}]} diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..752769167f3 --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1674064791599,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"2","numOutputBytes":"600"},"engineInfo":"Databricks-Runtime/","txnId":"fb0a7015-0096-4d74-821b-3507163c17fa"}} +{"add":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064791593,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":300},\"maxValues\":{\"value\":700},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.crc new file mode 100644 index 00000000000..ffd7469f97c --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.crc @@ -0,0 +1 @@ +{"txnId":"d50de74c-f8c8-4e68-b120-267504045e9d","tableSizeBytes":20757,"numFiles":21,"numDeletedRecordsOpt":11,"numDeletionVectorsOpt":8,"numMetadata":1,"numProtocol":1,"setTransactions":[],"metadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true"},"createdTime":1674064767118},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[21,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[20757,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[13,8,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770207,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":30},\"maxValues\":{\"value\":1992},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860006","MIN_INSERTION_TIME":"1674064769860006","MAX_INSERTION_TIME":"1674064769860006","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":1997},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860015","MIN_INSERTION_TIME":"1674064769860015","MAX_INSERTION_TIME":"1674064769860015","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":43,"sizeInBytes":34,"cardinality":1}},{"path":"part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet","partitionValues":{},"size":1008,"modificationTime":1674064769860,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":4},\"maxValues\":{\"value\":1967},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064769860000","MIN_INSERTION_TIME":"1674064769860000","MAX_INSERTION_TIME":"1674064769860000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","partitionValues":{},"size":1007,"modificationTime":1674064770427,"dataChange":false,"stats":"{\"numRecords\":100,\"minValues\":{\"value\":3},\"maxValues\":{\"value\":1996},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"UGM+pBY.mtVeP","txnId":"d50de74c-f8c8-4e68-b120-267504045e9d"}} +{"remove":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":600,"tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860001","MIN_INSERTION_TIME":"1674064769860001","MAX_INSERTION_TIME":"1674064769860001","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":85,"sizeInBytes":34,"cardinality":1}}} +{"remove":{"path":"part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860012","MIN_INSERTION_TIME":"1674064769860012","MAX_INSERTION_TIME":"1674064769860012","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":1,"sizeInBytes":34,"cardinality":1}}} +{"remove":{"path":"part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860014","MIN_INSERTION_TIME":"1674064769860014","MAX_INSERTION_TIME":"1674064769860014","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1008,"tags":{"INSERTION_TIME":"1674064769860018","MIN_INSERTION_TIME":"1674064769860018","MAX_INSERTION_TIME":"1674064769860018","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"remove":{"path":"part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet","deletionTimestamp":1674064797399,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":1007,"tags":{"INSERTION_TIME":"1674064769860019","MIN_INSERTION_TIME":"1674064769860019","MAX_INSERTION_TIME":"1674064769860019","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"m9JzgVlI!?Oy<+3x+y^b","offset":127,"sizeInBytes":34,"cardinality":1}}} +{"add":{"path":"part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064791593,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":300},\"maxValues\":{\"value\":700},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1674064791593000","MIN_INSERTION_TIME":"1674064791593000","MAX_INSERTION_TIME":"1674064791593000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"UGM+pBY.mtVeP","txnId":"4016704a-babb-44a8-ae8b-c53303465742"}} +{"add":{"path":"part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet","partitionValues":{},"size":600,"modificationTime":1674064798704,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"value\":900},\"maxValues\":{\"value\":1567},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1674064798704000","MIN_INSERTION_TIME":"1674064798704000","MAX_INSERTION_TIME":"1674064798704000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/core/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin b/core/src/test/resources/delta/table-with-dv-large/deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin new file mode 100644 index 0000000000000000000000000000000000000000..e729ea4e69624842fbff7202c22046e04d342ef7 GIT binary patch literal 211 zcmZQ%U|>+Xc-b+Xc-b*|gEkPFuGfeaRs!ohb28Qp;wrnNFel`Yeh86o5cL4yZk|7NM literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e3cb2ff9e2da3be8e8ee489037b85d4dc0bf0e36 GIT binary patch literal 600 zcmZWn&1%~~5T3|N(4iDcv&#wu6@r=s?5!Ooc5Qkw^w5@KLJ2*jDPbgyToL^^`bi1K zw?0ZABM(*R3v^_=C!cn{nQvyknVrkom!SaR2v_|5^Ypvp2$SKFumGN)8vxMh^bP=? z*tB05wlZ79PwNndEg3k{5_2|M-23pXT4H&3TZKO;Lt(JY4{{3PdH>)p2UwvB|7O`k2|My!nM0mxqm;#hta8aT`BT5^8OI&PF*lnaeZ zOR%HgZqYRT^2ueMm0uQqqrGed_4hTXS9?mf!@S3uu6PgaJl8v6{j79% zlej}YJkx%^l^F1f?yCM^FgSBhf+k5L7e|v|5XbSD#sT^f#ux-R8IOj3;71qumaQ;A Ricf9F;TtgddoKB={sEqkon8O{ literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8f856e0a33603a1cdeccaff55c59c73ccf10ab53 GIT binary patch literal 600 zcmZWnO>5gg5S_}35v3GLv&#wu5rUco?5!Lnc5Qkw^w5@KLJ2*jDPbh79Fdje=p!W< z-}0CG2ipFyj%@ej)6ScDGxKJ4E@oec44A`Q_WRG@U(X$866`Vy;N_VC0G-ZL2jHj+ zV81eKVYY~#*1-&$6L7f2=47SnT@TX_&kbh$mD9h|we11qeu7zEc3yDjl z-f|;D)@i1$id>1;vrLs~gm1qT)y*tlD6J|G&R&963ydB=%`Yb-0U*R4;X##ETIAv& zkY~~dak9s#?zN;H+&8M0W${yHs>T>=6xS=Seq1VV>Q!~wM77uVl2qxq@}}QcUXEEC z+H8sU;modehZ<{bLQ{Xiv#@m=9R1OWH#_&(%)YAS>P7 zB<@fSPh~J@C5E)3yK3P3{;7K$HmQzX98E$$Ns_S|hZw||U>M?LJQ@aJ5TD~avZ4@G Qa$-9U-GE8ob3r%t4F|9LL|TI_^OTwHan$A(_aObXr~iq;$v#Dv=QL5N7F{-M6+myQ?#^nurv2 z>5z1*lN}-tCK%MEE=BYQiHL4>DMY-bH&4=`&#Ij~eEELg_xt<3_nS8}{XGLMy3kiN z{k5il`?0bZi|eZq98Hf2@wjaJQKU==u^NeNMTOXk2=N`B02TOaL@7K0CRzW4Ka=%h z)xd{<4?dAy1y9J&kX56X!5UdFd>n5by-l8l@8C_N=g|vff598PHt+@iIh-TA0^Wk( zcpGpzbtm9HcoFYC9Dp74FM4jn4=aRtMxhUCH6iX+3h@daW&HsC5uHaLMZW-}tUu86 z65fITuum0w1HA+;Q*)N}5(w!V0rOxO|0el8bPYO04}b;qGUx#(@-wfZ!OQUb0JAgxORYxte98*B0LG`F*u!tKmeN#h~jHU}!!G!e3M}pK1m6LiD z^H|iZn37&m31c;4#jSWf?>uU?$Ao6>Cwi2~Sl5hAtv+^oZ~MKo4AHQA*xriM_*B?a z2;*s`ZW!B|?ub(3YNS3!_hAMaHm!m`=Ew=j49@O2PMFQ9p+a7nwLLlIXH)WIUm+Oj z$&V>d1;UIUXzGA%=C4$s=*%!IPL?I84vsnBk+Gg;-O1H1d`teuy+QnY(xB>R=we5l5hkC88W zo^(BGXjV=3|84E=*}1uEbHY4wV3NIuIVVFk$UT&vXJ~)0=A2)*|0H!SPD`tkY%Y!H zaz*1ob0U#wt!r?LSub5DJG$INCX?w@ovuu#Wk$NL?CR`jNxI2&yF7(Txl&~st$3Vo Lp^8`3&%gc;QGnWo literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c57f1d7871d7ec7b084de6c24f047345e020b90f GIT binary patch literal 1008 zcmZWo-%Au>6umRL<0cwp%`gK4EfdNL>$L0Yni7>%pb%kVJ*Xky?0&N?&d%!0tYM1? zK`((I6=o3xKJ??I2!eVkB80YoAnHjFAqs;qski8?yFKm0;hcNUz4QH;nUTTKgiP`? zNq#NKH=aIP+NaB90gfbxiS9P+t_Tf8R2GCOk?1t!ra*KF-WtGwA=De-ugGo#_kj2C z-oV?BnQN$Xz!%i3fCTR%@?PLKpa5@?Uq|*F`VaDl&>P@yKot5NxP+cD-~#Hu==%h| z1Kxu70{MOD6wrpc6L8_RqGuJ}3iK234lvwA6p@J5Lqzk?CxNr5f1>`5{vXiI&>z83 zWCXnqwt-WPM6ZCC$d-UsWMTLZphLhY@Bv;M;K5%69>bf!%uRU1z#{Ys@DA1zp?jd4 z(bo>Xg?bD4jF}|rcKFNC%g{%_Mer}=4p=)NYHDNUw4llcWl>X5=t@}LmY^y|Y^cSb zVcztVpT5HLOcr6bi)v_&IYmxNS1mq-;vcw$u?mUVVIZErp_c zi&ZPEhEE00#A{q@k`-lFl1-`%mW7TmrVn95ld4wmvL>5h*n_#Z9W%(~`FO$Q>cPQ0 z_i|}=^jyK88g#SV;XbK)-GIZhEImJbeyl&G5~*rk!yNbgT)|aCNwdd{liFF=;m
  • 3WxGZ0Z_HGPGtuyBa zJj*>8Zqad=<)8<{JZAiVtT9kpSh&1E>fySbR1b581$+$mP&v6Bw;4H z&16?)ix#eE+V6_TpTEt+Zp(*wz#u6h#z^RHfb`3n{p8<2OxQW%1*D=ey_Jd+yC-Wawg2 zBI$)Ft%=fy)rWCSstGVeDN1xFynR~;6H!eNssho;5YcCF4f+_Of%p^gFCrm6Log8+ z(Q^|xAO^ibOe4o^a=1B zRFL0*e*k;|9mwCHZv$M1zJ&M$btSA>hzlaos}`bn5!?lgf)(VOU>!P)c)ykCHE2g3 z1G}Ivz&{TC9z?)m;9)Nvej0oS3HZ0*FTwu?81g@eQ}AZN3^6%=rWP zJ?LfhJOv}*E#T;Bhra>67kotBC*`5d)Zk1&Dw z&~VY48Y<+t%{@}IrUskmSZ4n0`O$R0LZm271+(1s^2LJEk}?h(2~xV6D!dunz+23e zxo^0|Im6*T%QBxSlNH^cFL8ZP_k6dU@%3)q%5yvG>4Os$y}+DcY3IeT(x}WAe4gVj zj6&JAnQ5a2&AeOxzpXx4sa7vnN!iylN&PaXn9oP?4#9oK8-w*YZme;WSkg%L8mXRO zL=#Um?e!!QiQZVJSYSW|shza8Q&dXm zgwzVD5(@~H3N{8hSK@C$D2M@dC_79H-6#_SJU3y=@bupI-go!iU7URGW?CkBOO&@o z`P=^I$A@*fF2E6GiRfwD{u7~%i0XoHFGloHAd2cdAOY{-ErWleWgD1)X6V^~KLH*@ z{Re&l+zxLOdI|bFvQFq7U<>tMWV_I7@DG9C!0Q0N1;^pNLY@S7195nLsCR%odL95f z@E!rrkRJ!{!E>O$pvQyW1!sX9(1RHKP$ZhcfDgbg;0|gYY4AnxC-A9uqOZ`K=ubjl zfPN000|t@50ImSPF>eytDs&Gr8~Qc+`%qs4*T8vr8^AgcqFzVeQDpalc6cX%ZFt9k zS>(g;*WfW=5_K2+Rp|HNb6^3`x?-9dEhbPD$T3Ypp(~QSCqhL=axxw)Fn@k7DBR{A zlVdPX#?+1_=2kf=bwSfLy;suFIwFy%H4{08NckZudt$G2zOj8{mO}jp!x|N);Zwn$ zM9RxfSyA>y*`#P;UFemt`Unm*qiPkuWU>W@Gnfa*F+<1W)0Hw;yK^4*odP?3wGzzb z$|dgdfKFLnttW3 z>G6;iS;*9x6(d|;;KsNSgnqRU8cD-;xLXX2@mniKnR!uk@M1`solZIACGNv0S6!D` zE_$$-CyoEx8sjUq+H8%~o|cm|E^~>6dF|9LL|T?YK}xVl&OaQp==bpwq6aZdwcE~Jwv-@^ioZZ!(*&@VC z1wnMFgWfuH@*oO95gr8V(9)s021U@J4%sQML7!1O3H#yq{r$eb_ulWl-5njelGcU3 ztm&^c{l&}q$9ofcBSJ;f<3cPnZO%uUgbl+#>EOedU=TFPp)TE!grOHx|Qf!Z! zvD?zCD`7MuR>Dej#1pjk#f4_=IC>Pz_^ufnT1WiU_V(Ln8KQIRuVph4VOrH%(UH@V8Jw+goS;-zk>bbz~vI`~U<$Zhb`m9}%<J z?o=w(lk9ZsB`=?p{R3{QP$*F|9RAIy;~or(dczD%xD2^ytkbTWt}9&@IwXZ!h=D2gX7}y3IJ>Jevx8_K{0g~!;H%&iycqO1%$k5+1%9IM8E^}Di#Q5?4!#@oL2m(1fiU_e z;N3)y0at*tzz$}OB5wj4$d3SB@V_F~56HUK8oUH3SeD7Uioj4qY)63#jOeKjZ;rdO zGhXJBD01AN$z8J6w!oc=Ahjy#hHgYc21@%wr09(dlaW$CD(a3B37x2qub-t-ckQrx zgwyb;)Sgv++|E>WS78G^IYU7xtk)Zh#rneCcBSBC!+aoR$8xz`QY3Aj$nqSwZJtUF#1nQR+s_A~GBy{v9^EkT MEp*_^AH%=?4@W-OTmS$7 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea18948b1ada2743ac13b807cde5cad64ce46459 GIT binary patch literal 1008 zcmZWoPe>F|82@I~aX>F&Gt9tH+d!h=PP=ZpX$1*E1|n8MV4A(zeX}jj&g#spVI~Ac ze{O+f6hsix!65ph(4o4vM(8Di=#&Lkc<3MtsMqC^fJBrfqFXhab3zRfl>}iFz;yU!K*+F z`Umg=wG+TGU^z}q9f!n}0_<8U> z)Ow+R0VViV^w$7G@Con})GqU&BvL+j6n8Dl{#|(0LK2&hI+SHTh zUM|h{^cDQk9yiM!?vtulEpT|2rKgVe4|a8`M5hU2S%G==eU#cjqX#^hRgC{Y3s#MsdVPt zfM>afz%4osvmEqbF>f>eZ)kh-&KlJaE^vw#oc9m4wzRR)XYy=dho(S(_3 zH(e=Evgkczd+PD(0L_ly!Dlaoe_2JY}(DhS|)rSXZhuk+hPT4z>>}Z82`Q QYPybZp%FjP0RHR$0J4(b00000 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e640051c8e43e444af6de696c87dd647ae83b6ef GIT binary patch literal 1008 zcmZXTO-K}B7{}jTbzDfI$Qfo}Lm5aEbXwiilpr*VA}LX$FvH&2eRtiQ-PN60MMMY+ zBIs5aL6@Qq1xADi4|UC@4oL{!13^gW1Ho&*RXch3^Z$6B=lz=9fu6xuUFh#LeOc4L zZ@yoNMD*fJ~2H-*XtJ!~16s1eb(G*mfgzF6Z=%EH8$pBPtJsfP(qgw3jH z=@peQN+Bz1MH~3$VXHkNG;6=2hbb99G-FF^h@9D5es3>BH12M;=h%&(3Tp~sysXv@ zV_VZ5Q64OX8X`>Ba-dPu$`{g(oRplw**%UEWU^``pHt?so~$Zl67pn!-XH79rIn|A zVMY%Ocq%OuvzM+6cXgP&yLq5tS{3|EK4(_Nol{Osm`w*d{3*}jEvAYpa0>YuC#wRP zlz}uy=j>p1QrX?M9~6p-z;3ov8RaE?yZh#xos-#eY4^pb?005zfl8|aL9Xa|()H+J zST)=Km$kd+=I3wC3$yOPN%k(&Cj&LiJCyG;vVX9aY@vStCiSgOYnu~qDId}0i6;D( zSS;37-{=-IUb0?xbh)uqD%GhvT^UcxlyqI$)!ES+cjL)+c^Z{)rAjqf(I~%#8a~kg G|Mh=#h}#_i literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c95daac1d1831a1479c15be5abc7677a41161532 GIT binary patch literal 1008 zcmZXTPe>GD7{=dObsR+rshMVMplv`=(P`IBH^qX54h0b+5}}5Cv-|BjI6JE|v!-o& zi;e|R9;}N{ho}zGp?jB&Afl*?WK?ts^AJK96+Po_r#<|5p7(j*@6UX$qp7jDO!8w< zekRI)udTgr*5$H*M3g0>)w-SALLCv61>r-4s0qGFAZkbS5_%Qd3Vnk7HQ2<=Thy({ zlklIw5BN9eEh3*py#XJEe*nJ^y+^2LKs#6i8hUZ)A7}?yfj=1T2fc+gJ@8k+E_!#s z7i2lG0>28qf!=-SQ_zTP9ke0qftHZDBGD`0fRErjNP?4%MDz7TKO2a)p}&!BUZdg&~0Ql;lE?gcJy@kZ^%=i4ZY`p zkbMQa$c{jNVfHe-iTN4SEzl8g0%&a!O|1`K8WJ8RM>GY6u1NBh2p0yur(66v=FLv~ z=^379as**VL~U4LPLY#R7BpScJ0%^X0}_c^wIfH+Qa+2yme?tcRJO02rBK)2VU-G} z;irNVJFnW!w&7F*I3{RE}m*vCO-ix7AowVJ6=eUQ! zEjkXf9L!)b?=}8!YYdl`moF}pdbs8$m5({a0zQs+2)}2dda=H|7p;CtG;YTG%|u^# zMGH?f>-WWCvHob6RkWQ{ZcW$m NTWGF|9RBUI_|outrTQ*P{^{-i&^q!_w6<~JFD}@GBGOX z6m*c#ONX*U5)p(!T`RPO2)cHO{#^_Y5lUT*zS-SQd-(Xi-}n38d%yQ~cDVmsLL~92 zAifmD^|jy6cBx`91V<2~L`yYWi=i4KDuzM>VWMBaE7V;fA|LiKpb`2T_&#_C{1CVf z96r>bR+a}z(@T8Tn`=u7SMYK{s3hMh2_W$vwTicF{G%9+7eX}?T(V5R61f9E$OQuZ3->Xqvh?(XGzpr zI;>pbG<+)9lSo>w6D4U&5OoR`7DFvjtlo=(#$~18;1zV0J^}~9cLBU=efr+%wzKSoaW6=ajj2tJ;zUbTDz7txs`FXzVmaM&GJF3^kV2#lBVtP z9Cu*YzGX4PLI#U@yY_!ut#59A{=z)T%~g|>FLRQ4d<5?h+-IyZSV!K8Rc;bX=!q^p z*%6Fr;E5*Pj(9xY6>Bwo)5^qHPs)gAv)NwWYp`U7Wtm~HRBulrX(Th<>QA)|+ z>Ld<2xQXH*>Lk`l9fY`4)U9<;7Zt>*4i2K9v~|kx<@^18f8Y1+_uk!|n>wG?h5lI6 zS2g|q%~#L%#PwDLN7G|M-0s@E73mT}v?7s(s1OH`@4+X$iHHyn(P!a%)W7Nb=)>si z=xKT>cnyAncLN-x{}jHA_o1DKN8tzfE&M3R;XT5egZsgL^b0skZ=GB(^-FXDzJR|7 zj#D264?u|j6=d;mlTYA%0Kd>5;hr8LRzb5{h@WsB?At{az6K{@3w}$#r3vu|E>VBS zKLbCu`lb{ZA)U)tj)@*sx;zUPdLbG-{dX$#&MKgYA1F>V<+iyS15QAF}+pc&Tp9*_& z#Jjt6!`RexN3<8VA_Fm2_cBnYUt!nmUoFh(3n2C-=u;e-1#Zp6sj$ga# zR8%MnGL&X+#SWM2%FfzB=r{7AowSQ(+zn&lqa=ef8u*igmq+c`;J+DVT%nW6TGE?2Y| z45d=3k-kB>)Y^9lF%PL!Qc@Zv95>uvHiO zb4`Dx=}Rwveyxh@l?aZe$Aq|DwQ?&`C4{I%A~)fO5g|Un!&oNqlc*52#6Iyy;%9Ia z#PJr;--%y?t>7BCgLeg0Z2ab`m(Hi`j z+&1_vm?w7+{{XxT{|4~zFQNy*1N8M8A>LIB@mCY#7v695a;U+}P0WDr;B~=g@pgm# z;01aoz7C(G?ke6E;w8Kv@HF0II0GDV@6iUiTJ%%+E^Oj|0(0p5=!5VgI#2Bo@n%>D zr-+;3FXY;Z`@sVB2|N>SL0em+mRTKc9}1i4QOiKYjhMcy5rsz6i8_Bmmc~c@^d*&- zdKB|O)U273URepF60zb|JQ0gi+8z^{wVu(Vq>OKxv8*LxN7k-i>t%?B)n;poZv0ia zlczq~q#MSHraK}$ScxQJoZi7eBc@d>jX82cGJ~@^juYhaYPeWX=JwvaD&^9$<#f>> z?JbNcPx-=(Z#dwoF`1q@d1k1)!(<2MhJqqG(j1w!gP93s_t<_=DyIXx$lK!<_dutQzeW- z+4H39QNv-?WdFa`?wOsNyD%rry&EQ3drY4U)DU|J-!r^ESaZHqzy6Z?R;RViNi~Nf zx~yp2Z%!tYZS@UqIp<~SWkXNDOVX*yu7kc>&wCQWf+pcMs?A_bwXwr`R*F=X=T%%oCE zT?v98N7x#!&T-n;K*GLpU$lSq0d zN*kiIzV>(NuqM?6IHD9JdepMFB(xAwO%Uz}h@PN41pO9BKo1K{yTafv4JW9$-SB0>1-a z2krp{=vT;Zf}bP1fI1C-9au#zf;C_Zy$x^{xh?oR&?(d&FaZ4;*oW7S%p&j@`ZM?| zcm+5L?4iF6ZxUDrbgW&5{}Ft`-w&R8B%ms-UAP&@Z%6@EhS21ov@4?WjqrsIx5Avl zq?@_Hi%bf@JQ+~hW|&>&B-aF0Q?>4(hG>6~M77b80%*y24xX)VUq4Hxo`b{c z3a8;y!Jb6&YP%%Md!l5Jzpy5B2eJA%2AWXRvXe7dg<%HcU>w8C7x`GZ#Fb;|B6so` zcIHyqolKW<+~zJRTGIlX=U8U;;^om)LLpL=roaq$+?q$x_KgEN;fL{cHeL#$?<&t!f&+ Mg%12gBlxfX0|dz2O8@`> literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..680b7371166d4fb4d52087c817c5e7666bc3a211 GIT binary patch literal 1008 zcmZXTPe>F|9LL|TIc^X^*bFnakqsnCI_>z&O3rcmqBHmeAkHKhygStbtqj5ps|E0I3AbjkduQIuAEdkHd5HZjzmV50QOEZ;(HPmp~(V5oAu+;D=T3vhODR+ZHYu_?TQG^+NtPaO2$vkSkYP{r)%5S&N9UD?Zax0)A&@_ zlUF+1pc}@zraPj#a52;pVKw7f*G;Qf$~tmXGJ~@{juYhaYN%LHW@BGom2xS0a-irB z_Z70rQ@${xyB2sVD^ru_FAny0n?ji8u7+_{@^i(4S)Xt^otQ9NcXjw(5*)>V+Vb01x4RR0F=NZ}=tUX_9-g%PdxD)Si67AIy zU9M=vZ;!=d9nHtxa?VRP%kEw`mdRv#RF5kYX_=9(D|>so;|VvB?vkfaDOajYs}+s% ME$rhJUE*K=2LS8bhX4Qo literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..267e8de72c4a230406bc12b77602b2ece1a49c0f GIT binary patch literal 1007 zcmZXTOK1~O6o&7l?KEmpL}#2}z%XEGp&1j?rcDr~P<((&k%G2bDajE0$&JH0&L(?{BLi`B{ae{n1JRsjeK7`%~``{q0pTWXc z)bFAnq0bPtUEgEc7|A3YrFb6GVdy>j6<&t;jJODc=nK@p!)q9UfZm&Mh*}W>n1WS! z_o<&{ZUylk@iG1+x}Uh8`b**!a+}(%ZXw2XA-;49@rzm(eHSlBeAp$#8(58=#?zn! z#$XZe228?Ja+CQx=UOs-97%Stj!$r zbH`O#8X?ThA*<_@^y*5O4J{IlM0>+gT8F|ykF+XAh?4nMH$UpV;cd&yFYjfFzNO8U zGj`)kVNG7+TDM`E-*m$f&B2D&8)kYv2RdR!sK_}e6qToZVMW^pJXMsrxjp+QGHHwN%W7*lrD}euTCuuP&Y+VJR)1TEf6{Y! zi<5N~IJN3&r>p{*mw~j7%-g}-tg^GVAJpo(z#gy*CFSLPJ9~KEuE=t;wDe+BTAigz zpo*%7QK@^LbUk_)Rs;6`W$oZIb$Bf31%j2}-V5?f<^ZoTB?V=|p_6NN%ySPi=}m6rwSx-v7IPNv*cen^g>a;{W` Q{zx>+x6s2+G|pfBAE6Q1+yDRo literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..65feba93d8b1857c9671e8cccddd1a58c4d4aa96 GIT binary patch literal 1008 zcmZWoO-K|`9DlRwIDsC@-tY#dS|;39&~ewzPZZnmkPsoEAZEy$-8bvv?1wY6%3=~! z*tL^g5~PD2%Az3X!&9d{k<9|1Lxj8qj{qF{ zH}DR9=isM+ufRI`-yZYt;xYYMhIhd4NJoHvV}ZrN|;y87-1cC9p0j2;w~0T!Z+Q* zvY8V;PjjEM*%iYt7lkooc)nXo`9`N$%QtQm-eUT9^f_%ww zxaFV+i$$mLe_LZ{Wp(xLDq)xECaFHm827~_?jd-dnc84oIX7H;l5pIN_n3*UU_=X7 zwBU8cVzHiZ)GB42beQ)IS~1(U`$fOS6KQU9%i@Fmeer~qNcZvqsFcNp-J$6^-h~$Y JMHBee{{cZ_+JOK7 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..61ef5982f650df0459cee5ffc6d06e3717dcc2cc GIT binary patch literal 1008 zcmZXTPe>F|9LL|T?zn-M$Qfo~Bpu2Wb=-AR(>x?t8I*~5GDF_%zTFmQcXemh5V0tz zPSq`g3L+z;z)nS%2%>|R2%-q0Q*{YEbnwuz�X|eEI(Q{oZ@O_jYz{XgsA0eNEF} zYx?K!&mKoCeLX^=>2V>JYc}shYJ?E$k;n?Xio6{aA{i0lBWTCh;SFko=uPx{^mTX; zcFCII0sITlj{X40Ko@8PzuD(0eV5U5=vU}3^qs(0;RUi^@C`6X_7J{B_5?mj?+9GP zQxc;f71rSA1bJ$I@DIQlvZLUyCPYI_h)K{|%bxgNcopy032_YX;xFMp!JGI4Z zeFdyhv+(c0Fg->5O>_*r#E;U`48D@z$Iros;6}1*^xQ))qXl>a?vka+uF`iL>;d0E znc7QTaj zQ8P9#y{Zz%dL&^btd_V%YfoHgiJgibrDXijj18?NKDfR8_E`pR79O_kIE_z*J^2J4 z)#-+@sp*ag7p_NI;;e39peZv^uH+p#E1ALB8pjC=MKxJ2DRbXYQB?{V*>$??PY;#y z%2U2Dtz8Q|m6w@?Q==38y(XudyBg+I#V?dgW-RR-c9Ozu+tuOEc@A$eS5<*iDPMGo zDv((jNONk@4i;vWJz)DmrJ4!s4m(#+Ue>n<&Mn#{Sqw{CFGgjjvrr0DUR4N6RnL>I zM-Pithy8zBdth;C>HLx~_wSly`!d5aP!qgEc%R9g!8(hTrk$HKrJPi^lkN;hba|o~ zzcZOkb~m-U)qF|9LL|TI_^P&s99!WAQ?!MblP=OQ@U6YCJ|E7VN>MI?ps})-PN60OBPRF z!XWZew}_w!3JD4-JOl}`LUk)b`X5MyL4uc{L!VVUdHC}E^ZUJ--+S|Bp!aNC7y5#x zKh^Y)_n*J6we(7eMAIWe+^E^O9;y*SR6?P5At7FYbNI{PN?3>z{x68q@*XI#j^76! z($f#Spp$$BABQioZVtUnJ`BF1AH&zk|4>ikE%YO@9?*|oBAZ7i;m`0h`c~04dIkN2 zo*$qaogg=0omv*X2rq#l>dW}CdLhc15Pk5yIw7vs3gP1$&~M0Q(M{-Out;s5>@K`Y zUPgZ)uYeceJU&Oh3m$|Q$;$NJLR;tv{0aXZokYI{FVPF|3~T1VEI3Ja5ImswH?=!t zzwkftU*KB&aq@L=41E(ecZW@*wtDhVb#Ogw;!Gt;cV@D=L~dU$*6V8&`#A zas_VJ>(S_jraPj#uo7yDuzD{84H;&kIO@m=$qdfsI8KnwsbnFqj6JqcSyp>datwCm+`^b~H?>qMt3~jk<($*og_FWk-iU={cOmWJv{1 zv2fAJsX(S>AdR829ZXLsyT|r}Vks5at#&4>ytHrkTqxUlnX8sIPmIcTXEq?B6lT)@Dw~Kn-#Z)%_&52W!g}8@G4T7EoHrQqwMTF|9LIk%>Nto6ku%QJkjp@uf=;_`YFb2#9SWi>B+xbF&FL1|UgU-P_25%hf2EW4liTD?E9PtGBvX!U{n|LimF=!6)D!kud5&0JQ0(lB?XEV_{ zn1c5i@e2I!U>w|s{}%j&+6GjH9C$x4PZT|0pcLwR;6A*Eh?k-F=-Wcy1at!VU1$xl zfW9QOiTYpg0yv7=9@efQFCm`=N6{;R`Vmprnqd+U4x)&{Sx!$ zrv3B`&of1Yc~aC`7MNS+q*evp(2b~UpmjhdNv~HF5hZn7Qg@`Ne7?4Q?JSkL4-Ttk zI1Qf)_9RlDv?{8)D=8L*3#&p@#_A3XG^Oc9FK4k5!wlBJI9A}~`DC%cwWG;A_nb64 zeYNOMCkr|5a-TG#VS&qYEWLQ;+W7FGMx<#C4GY}!onk?2NmysB7->BX9say);VtIN zJg~gt9V^cRmSF+Yrk2cLvBb?0(+|9IIxu_9ti#=mZ;qsv%mT}YrGpnkrQYcj0-oa@ zj6&IUneCzni+Qj4e_L~8sZzOJA+4)nlGh($ literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet b/core/src/test/resources/delta/table-with-dv-large/part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9ba4278e87571206e3659341efdea61616db6c53 GIT binary patch literal 1007 zcmZXT%}W$v9LJwsb=reLQ8UfbLd#^5;Ewy^dm;!Hy&)n&Wm!D4`>ZX_?&{2JA!<~F z6-1X_PnM^GKr*631$8O>1L__gI*149SeHJlcG|<2&+q$tdFD6I&JGWZq;#P#Y5IFj z|NCm;?#`IL5+bAN5h0$|ZG5T|Lac;BpW&B~5J%xY)J}Q~zJt5qDqMg=^z@@|_#0s% z(r^~uqgF;YtHfV|N8lpthriGX=IY>mco2SNwnV%{JV^W<_QO7Mv&@a)=ZTxhwc)#w zhyLInQhO2#ecbpV#5(aZeG~9agAkb=Ley*Q0iWR?5T7MhaG_p^^QZ|equ115!Xrqd zrw{HYw*=?uPop;SE$}3qrj|pC)PItD4o@LL?HoRVOzP|CKJhVXZ_#z?2L2K8G2%G& zTllB=JNOUy+vqX5WqfpZIBM26aWJSR^l;R`#Egi(rV&*Wt;ZYvN$E{o_On-1QR-om zhr(vVRq0lgFjhj*STxogiP72-5n6P+qK7FNtD3Q=Fsxdw|HoRrGjnrebHY4Ov&q)W49Y;A;U22*GrGN4qUgo9-z1*0Q)xSssIKU6 zMH7A^nM|hRhn-5n&BbMBkCV*j^IfXTk(r##OUIEtU7e|nlgV|+ZlbIsRlX$}i}5Wq K@)Hg7SN{hZMb;Jo literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-small/.deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin.crc b/core/src/test/resources/delta/table-with-dv-small/.deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin.crc new file mode 100644 index 0000000000000000000000000000000000000000..3f009cbc1befe458088c0677dd87fb4a4980f1aa GIT binary patch literal 12 TcmYc;N@ieSU}88U=kp!_5|IO7 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.crc.crc b/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.crc.crc new file mode 100644 index 0000000000000000000000000000000000000000..03333cabf401ba55ec9cc3a775279bd339df3fa0 GIT binary patch literal 32 ocmYc;N@ieSU}E^QUP^mgVq8q+0rvKItHnDGz4d(WS28mU0J=C0PXGV_ literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.json.crc b/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..6192a08b32dda6245b166f5eb880a23c607dd30d GIT binary patch literal 24 gcmYc;N@ieSU}89Ie`d$M4#SlBvs(_gicGu;0BW`hJOBUy literal 0 HcmV?d00001 diff --git a/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.crc.crc b/core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.crc.crc new file mode 100644 index 0000000000000000000000000000000000000000..557188406957013bbd8fc50f9abf8647db954e0f GIT binary patch literal 32 ocmYc;N@ieSU}E5s","txnId":"d54c00f5-9500-4ed5-b1b5-9f463861f4d3"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","columnMapping"],"writerFeatures":["deletionVectors","columnMapping"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\"}}]}","partitionColumns":[],"configuration":{"delta.columnMapping.mode":"name","delta.enableDeletionVectors":"true","delta.columnMapping.maxColumnId":"1"},"createdTime":1673461406485}} +{"add":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","partitionValues":{},"size":818,"modificationTime":1673461408778,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"maxValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":9},\"nullCount\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.crc b/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.crc new file mode 100644 index 00000000000..127032042dc --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.crc @@ -0,0 +1 @@ +{"txnId":"3943baa4-30a0-44a4-a4f4-e5e92d2ab08b","tableSizeBytes":818,"numFiles":1,"numDeletedRecordsOpt":2,"numDeletionVectorsOpt":1,"numMetadata":1,"numProtocol":1,"setTransactions":[],"metadata":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"delta.columnMapping.id\":1,\"delta.columnMapping.physicalName\":\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\"}}]}","partitionColumns":[],"configuration":{"delta.columnMapping.mode":"name","delta.enableDeletionVectors":"true","delta.columnMapping.maxColumnId":"1"},"createdTime":1673461406485},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","columnMapping"],"writerFeatures":["deletionVectors","columnMapping"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[818,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[0,1,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","partitionValues":{},"size":818,"modificationTime":1673461408778,"dataChange":false,"stats":"{\"numRecords\":10,\"minValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"maxValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":9},\"nullCount\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"WYbkwCTB$gH)J7t?$/sK","offset":1,"sizeInBytes":36,"cardinality":2}}]} diff --git a/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json b/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..392d28d6a33 --- /dev/null +++ b/core/src/test/resources/delta/table-with-dv-small/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1673461427387,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/private/var/folders/g3/hcd28y8s71s0yh7whh443wz00000gp/T/spark-cb573b98-e75d-460f-9769-efd9e9bfeffc`.value IN (0, 9))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"11114","numDeletedRows":"2","scanTimeMs":"10589","numAddedFiles":"0","rewriteTimeMs":"508"},"engineInfo":"Databricks-Runtime/","txnId":"3943baa4-30a0-44a4-a4f4-e5e92d2ab08b"}} +{"remove":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","deletionTimestamp":1673461427383,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":818,"tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"r4/part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet","partitionValues":{},"size":818,"modificationTime":1673461408778,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"maxValues\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":9},\"nullCount\":{\"col-4f064e48-f371-433a-b851-9e73c78fa9fc\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1673461408778000","MIN_INSERTION_TIME":"1673461408778000","MAX_INSERTION_TIME":"1673461408778000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"WYbkwCTB$gH)J7t?$/sK","offset":1,"sizeInBytes":36,"cardinality":2}}} diff --git a/core/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin b/core/src/test/resources/delta/table-with-dv-small/deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1a01e661cdcca08ff5d67e7d2de53381980735a GIT binary patch literal 45 lcmZQ%U|>+Wc-bD@NG^{``{ANwLi|DjM_|E5q*8e+SvA-Rn+oiigZ z7aR(}G5GC7yiQUv&ojZE%u-P%A-s5-R~NHvA*84XGxrEmEFfIHIXRn*7$J=9C43TP zmE;*~d3?Yf!jASbs%yzn&g-S9d6|Fasi+}>8rW}TZPlwnSW~O2%Vk(weJf6cj4EsT zVQpoQHd}X99m%_5Nmh%6C^3*NrG!9Y8LTh**1w~z>Dr2ftbt?oWtzQ%qDZm@FD_RV ziXu<`^XSdy$u!K=APYX65yR&Rkq_e$DLlV wOk~u7(Ijx!nf+#GXMYp literal 0 HcmV?d00001 diff --git a/core/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala b/core/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala index c44c39c4f97..ba1a2571f71 100644 --- a/core/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/sql/delta/ActionSerializerSuite.scala @@ -226,6 +226,112 @@ class ActionSerializerSuite extends QueryTest with SharedSparkSession with Delta expectedJson = """{"remove":{"path":"part=p1/f1","deletionTimestamp":11,"dataChange":true,""" + """"extendedFileMetadata":true,"partitionValues":{"x":"2"},"size":10}}""".stripMargin) + private def deletionVectorWithRelativePath: DeletionVectorDescriptor = + DeletionVectorDescriptor.onDiskWithRelativePath( + id = UUID.randomUUID(), + randomPrefix = "a1", + sizeInBytes = 10, + cardinality = 2, + offset = Some(10)) + + private def deletionVectorWithAbsolutePath: DeletionVectorDescriptor = + DeletionVectorDescriptor.onDiskWithAbsolutePath( + path = "/test.dv", + sizeInBytes = 10, + cardinality = 2, + offset = Some(10)) + + private def deletionVectorInline: DeletionVectorDescriptor = + DeletionVectorDescriptor.inlineInLog(Array(1, 2, 3, 4), 1) + + roundTripCompare("Add with deletion vector - relative path", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorWithRelativePath)) + roundTripCompare("Add with deletion vector - absolute path", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath)) + roundTripCompare("Add with deletion vector - inline", + AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorInline)) + + roundTripCompare("Remove with deletion vector - relative path", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorWithRelativePath)) + roundTripCompare("Remove with deletion vector - absolute path", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath)) + roundTripCompare("Remove with deletion vector - inline", + RemoveFile( + path = "test", + deletionTimestamp = Some(1L), + extendedFileMetadata = Some(true), + partitionValues = Map.empty, + dataChange = true, + size = Some(1L), + tags = Map.empty, + deletionVector = deletionVectorInline)) + + // These make sure we don't accidentally serialise something we didn't mean to. + testActionSerDe( + name = "AddFile (with deletion vector) - json serialization/deserialization", + action = AddFile( + path = "test", + partitionValues = Map.empty, + size = 1, + modificationTime = 1, + dataChange = true, + tags = Map.empty, + deletionVector = deletionVectorWithAbsolutePath), + expectedJson = + """ + |{"add":{ + |"path":"test", + |"partitionValues":{}, + |"size":1, + |"modificationTime":1, + |"dataChange":true, + |"tags":{}, + |"deletionVector":{ + |"storageType":"p", + |"pathOrInlineDv":"/test.dv", + |"offset":10, + |"sizeInBytes":10, + |"cardinality":2}} + |}""".stripMargin.replaceAll("\n", "") + ) + testActionSerDe( "AddCDCFile (without tags) - json serialization/deserialization", diff --git a/core/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala b/core/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala index 23e095b4f5a..6bae748fd22 100644 --- a/core/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala +++ b/core/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala @@ -214,13 +214,14 @@ class CheckpointsSuite extends QueryTest test("checkpoint does not contain remove.tags and remove.numRecords") { withTempDir { tempDir => - var expectedRemoveFileSchema = Seq( + val expectedRemoveFileSchema = Seq( "path", "deletionTimestamp", "dataChange", "extendedFileMetadata", "partitionValues", - "size") + "size", + "deletionVector") val tablePath = tempDir.getAbsolutePath // Append rows [0, 9] to table and merge tablePath. spark.range(end = 10).write.format("delta").mode("overwrite").save(tablePath) diff --git a/core/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala b/core/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala index 61f389cbd31..044987f24c4 100644 --- a/core/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala +++ b/core/src/test/scala/org/apache/spark/sql/delta/DeltaParquetFileFormatSuite.scala @@ -64,7 +64,7 @@ class DeltaParquetFileFormatSuite extends QueryTest val fs = addFilePath.getFileSystem(hadoopConf) val broadcastDvMap = spark.sparkContext.broadcast( - Map(fs.getFileStatus(addFilePath).getPath().toString() -> dv) + Map(fs.getFileStatus(addFilePath).getPath().toUri -> dv) ) val broadcastHadoopConf = spark.sparkContext.broadcast( @@ -73,7 +73,7 @@ class DeltaParquetFileFormatSuite extends QueryTest val deltaParquetFormat = new DeltaParquetFileFormat( metadata, isSplittable = false, - disablePushDowns = false, + disablePushDowns = true, Some(tablePath), Some(broadcastDvMap), Some(broadcastHadoopConf)) diff --git a/core/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala b/core/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala new file mode 100644 index 00000000000..bacc482f51a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/sql/delta/deletionvectors/DeletionVectorsSuite.scala @@ -0,0 +1,197 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.delta.deletionvectors + +import java.io.File + +import org.apache.spark.sql.delta.{DeltaLog, DeltaTestUtilsForTempViews} +import org.apache.spark.sql.delta.DeltaTestUtils.BOOLEAN_DOMAIN +import org.apache.spark.sql.delta.actions.DeletionVectorDescriptor.EMPTY +import org.apache.spark.sql.delta.deletionvectors.DeletionVectorsSuite._ +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.test.DeltaSQLCommandTest +import org.apache.spark.sql.delta.util.JsonUtils +import com.fasterxml.jackson.databind.node.ObjectNode +import org.apache.commons.io.FileUtils + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Subquery} +import org.apache.spark.sql.test.SharedSparkSession + +class DeletionVectorsSuite extends QueryTest + with SharedSparkSession + with DeltaSQLCommandTest + with DeltaTestUtilsForTempViews { + import testImplicits._ + + test(s"read Delta table with deletion vectors") { + def verifyVersion(version: Int, expectedData: Seq[Int]): Unit = { + checkAnswer( + spark.read.format("delta").option("versionAsOf", version.toString).load(table1Path), + expectedData.toDF()) + } + // Verify all versions of the table + verifyVersion(0, expectedTable1DataV0) + verifyVersion(1, expectedTable1DataV1) + verifyVersion(2, expectedTable1DataV2) + verifyVersion(3, expectedTable1DataV3) + verifyVersion(4, expectedTable1DataV4) + } + + test("throw error when non-pinned TahoeFileIndex snapshot is used") { + // Corner case where we still have non-pinned TahoeFileIndex when data skipping is disabled + withSQLConf(DeltaSQLConf.DELTA_STATS_SKIPPING.key -> "false") { + def assertError(dataFrame: DataFrame): Unit = { + val ex = intercept[IllegalArgumentException] { + dataFrame.collect() + } + assert(ex.getMessage contains + "Cannot work with a non-pinned table snapshot of the TahoeFileIndex") + } + assertError(spark.read.format("delta").load(table1Path)) + // assertError(spark.read.format("delta").option("versionAsOf", "2").load(table1Path)) + } + } + + test("read Delta table with deletion vectors with a filter") { + checkAnswer( + spark.read.format("delta").load(table1Path).where("value in (300, 787, 239)"), + // 300 is removed in the final table + Seq(787, 239).toDF()) + } + + test("read Delta table with DV for a select files") { + val deltaLog = DeltaLog.forTable(spark, table1Path) + val snapshot = deltaLog.unsafeVolatileSnapshot + + // Select a subset of files with DVs and specific value range, this is just to test + // that reading these files will respect the DVs + var rowCount = 0L + var deletedRowCount = 0L + val selectFiles = snapshot.allFiles.collect().filter( + addFile => { + val stats = JsonUtils.mapper.readTree(addFile.stats).asInstanceOf[ObjectNode] + // rowCount += stats.get("rowCount") + val min = stats.get("minValues").get("value").toString + val max = stats.get("maxValues").get("value").toString + val selected = (min == "18" && max == "1988") || + (min == "33" && max == "1995") || (min == "13" && max == "1897") + // TODO: these steps will be easier and also change (depending upon tightBounds value) once + // we expose more methods on AddFile as part of the data skipping changes with DVs + if (selected) { + rowCount += stats.get("numRecords").asInt(0) + deletedRowCount += Option(addFile.deletionVector).getOrElse(EMPTY).cardinality + } + selected + } + ).toSeq + assert(selectFiles.filter(_.deletionVector != null).size > 1) // make at least one file has DV + + assert(deltaLog.createDataFrame(snapshot, selectFiles).count() == rowCount - deletedRowCount) + } + + for (optimizeMetadataQuery <- BOOLEAN_DOMAIN) + test("read Delta tables with DVs in subqueries: " + + s"metadataQueryOptimizationEnabled=$optimizeMetadataQuery") { + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> + optimizeMetadataQuery.toString) { + val table1 = s"delta.`${new File(table1Path).getAbsolutePath}`" + val table2 = s"delta.`${new File(table2Path).getAbsolutePath}`" + + def assertQueryResult(query: String, expected1: Int, expected2: Int): Unit = { + val df = spark.sql(query) + assertPlanContains(df, Subquery.getClass.getSimpleName.stripSuffix("$")) + val actual = df.collect()(0) // fetch only row in the result + assert(actual === Row(expected1, expected2)) + } + + // same table used twice in the query + val query1 = s"SELECT (SELECT COUNT(*) FROM $table1), (SELECT COUNT(*) FROM $table1)" + assertQueryResult(query1, expectedTable1DataV4.size, expectedTable1DataV4.size) + + // two tables used in the query + val query2 = s"SELECT (SELECT COUNT(*) FROM $table1), (SELECT COUNT(*) FROM $table2)" + assertQueryResult(query2, expectedTable1DataV4.size, expectedTable2DataV1.size) + } + } + + test("insert into Delta table with DVs") { + withTempDir { tempDir => + val source1 = new File(table1Path) + val source2 = new File(table2Path) + val target = new File(tempDir, "insertTest") + + // Copy the source2 DV table to a temporary directory + FileUtils.copyDirectory(source1, target) + + // Insert data from source2 into source1 (copied to target) + // This blind append generates a plan with `V2WriteCommand` which is a corner + // case in `PrepareDeltaScan` rule + val insertDf = spark.sql(s"INSERT INTO TABLE delta.`${target.getAbsolutePath}` " + + s"SELECT * FROM delta.`${source2.getAbsolutePath}`") + // [[AppendData]] is one of the [[V2WriteCommand]] subtypes + assertPlanContains(insertDf, AppendData.getClass.getSimpleName.stripSuffix("$")) + + val dataInTarget = spark.sql(s"SELECT * FROM delta.`${target.getAbsolutePath}`") + + // Make sure the number of rows is correct. + for (metadataQueryOptimization <- BOOLEAN_DOMAIN) { + withSQLConf(DeltaSQLConf.DELTA_OPTIMIZE_METADATA_QUERY_ENABLED.key -> + metadataQueryOptimization.toString) { + assert(dataInTarget.count() == expectedTable2DataV1.size + expectedTable1DataV4.size) + } + } + + // Make sure the contents are the same + checkAnswer( + dataInTarget, + spark.sql( + s"SELECT * FROM delta.`${source1.getAbsolutePath}` UNION ALL " + + s"SELECT * FROM delta.`${source2.getAbsolutePath}`") + ) + } + } + + private def assertPlanContains(queryDf: DataFrame, expected: String): Unit = { + val optimizedPlan = queryDf.queryExecution.analyzed.toString() + assert(optimizedPlan.contains(expected), s"Plan is missing `$expected`: $optimizedPlan") + } +} + +object DeletionVectorsSuite { + val table1Path = "src/test/resources/delta/table-with-dv-large" + // Table at version 0: contains [0, 2000) + val expectedTable1DataV0 = Seq.range(0, 2000) + // Table at version 1: removes rows with id = 0, 180, 300, 700, 1800 + val v1Removed = Set(0, 180, 300, 700, 1800) + val expectedTable1DataV1 = expectedTable1DataV0.filterNot(e => v1Removed.contains(e)) + // Table at version 2: inserts rows with id = 300, 700 + val v2Added = Set(300, 700) + val expectedTable1DataV2 = expectedTable1DataV1 ++ v2Added + // Table at version 3: removes rows with id = 300, 250, 350, 900, 1353, 1567, 1800 + val v3Removed = Set(300, 250, 350, 900, 1353, 1567, 1800) + val expectedTable1DataV3 = expectedTable1DataV2.filterNot(e => v3Removed.contains(e)) + // Table at version 4: inserts rows with id = 900, 1567 + val v4Added = Set(900, 1567) + val expectedTable1DataV4 = expectedTable1DataV3 ++ v4Added + + val table2Path = "src/test/resources/delta/table-with-dv-small" + // Table at version 0: contains 0 - 9 + val expectedTable2DataV0 = Seq(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + // Table at version 1: removes rows 0 and 9 + val expectedTable2DataV1 = Seq(1, 2, 3, 4, 5, 6, 7, 8) +} From dbd42dc9a1438d7028bf9888240681a207343d70 Mon Sep 17 00:00:00 2001 From: Venki Korukanti Date: Wed, 25 Jan 2023 13:52:38 -0800 Subject: [PATCH 2/3] review --- .../sql/delta/PreprocessTableWithDVs.scala | 2 +- .../sql/delta/SubqueryTransformerHelper.scala | 19 +++++++++++-------- .../sql/delta/stats/PrepareDeltaScan.scala | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala b/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala index 2b3996b707e..3126985c32c 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/PreprocessTableWithDVs.scala @@ -64,7 +64,7 @@ import org.apache.spark.util.SerializableConfiguration */ trait PreprocessTableWithDVs extends SubqueryTransformerHelper { def preprocessTablesWithDVs(plan: LogicalPlan): LogicalPlan = { - transformWithSubqueries(plan) { + transformSubqueryExpressions(plan) { case ScanWithDeletionVectors(dvScan) => dvScan } } diff --git a/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala b/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala index ef18ea59d5f..97224863bcb 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/SubqueryTransformerHelper.scala @@ -20,21 +20,24 @@ import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, SupportsSubquery} /** - * Trait to allow processing '''all''' instances of a certain node in a subquery. - * - * Pattern matching in transform cannot be used because of the short-circuiting - * nature of the pattern matching. It stops matching after one instance of - * the certain node is found and remaining nodes in the subquery plan will - * not be transformed. + * Trait to allow processing a special transformation of [[SubqueryExpression]] + * instances in a query plan. */ trait SubqueryTransformerHelper { /** * Transform all nodes matched by the rule in the query plan rooted at given `plan`. + * It traverses the tree starting from the leaves, whenever a [[SubqueryExpression]] + * expression is encountered, given [[rule]] is applied to the subquery plan `plan` + * in [[SubqueryExpression]] starting from the `plan` root until leaves. + * + * This is slightly different behavior compared to [[QueryPlan.transformUpWithSubqueries]] + * or [[QueryPlan.transformDownWithSubqueries]] + * * It requires that the given plan already gone through [[OptimizeSubqueries]] and the * root node denoting a subquery is removed and optimized appropriately. */ - def transformWithSubqueries(plan: LogicalPlan) + def transformSubqueryExpressions(plan: LogicalPlan) (rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = { require(!isSubqueryRoot(plan)) transformSubqueries(plan, rule) transform (rule) @@ -52,7 +55,7 @@ trait SubqueryTransformerHelper { plan transformAllExpressionsUp { case subquery: SubqueryExpression => - subquery.withNewPlan(transformWithSubqueries(subquery.plan)(rule)) + subquery.withNewPlan(transformSubqueryExpressions(subquery.plan)(rule)) } } } diff --git a/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala b/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala index 30fda5c8847..bad64a24cb7 100644 --- a/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala +++ b/core/src/main/scala/org/apache/spark/sql/delta/stats/PrepareDeltaScan.scala @@ -140,7 +140,7 @@ trait PrepareDeltaScanBase extends Rule[LogicalPlan] // delta scans. val deltaScans = new mutable.HashMap[LogicalPlan, DeltaScan]() - transformWithSubqueries(plan) { + transformSubqueryExpressions(plan) { case scan @ DeltaTableScan(planWithRemovedProjections, filters, fileIndex, limit, delta) => val scanGenerator = getDeltaScanGenerator(fileIndex) From a250ed02519c5c6ad1931f415b43668ebfded070 Mon Sep 17 00:00:00 2001 From: Venki Korukanti Date: Wed, 25 Jan 2023 21:09:14 -0800 Subject: [PATCH 3/3] remove unnecessary crc files from golden tables --- ...r_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc | Bin 12 -> 0 bytes ...r_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc | Bin 12 -> 0 bytes ...41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4782-a5d5-621cd92cab11-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4ba5-85dd-e63ddc5931bf-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4770-8e26-18a77cf875f0-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4b79-83a6-e7f57db28650-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4896-ba21-029309ab8736-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4feb-8816-19eccf7939f5-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4f15-b98d-b942db5b7359-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...43b8-b283-9e2308ef5487-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4553-a5db-d1b8d682153b-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4e12-b709-e628f9bf2553-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...4e4d-a258-738e585a78a5-c000.snappy.parquet.crc | Bin 16 -> 0 bytes ...47ec-8451-13edcd855b15-c000.snappy.parquet.crc | Bin 16 -> 0 bytes .../_delta_log/.00000000000000000000.crc.crc | Bin 100 -> 0 bytes .../_delta_log/.00000000000000000000.json.crc | Bin 88 -> 0 bytes .../_delta_log/.00000000000000000001.crc.crc | Bin 104 -> 0 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 52 -> 0 bytes .../_delta_log/.00000000000000000002.crc.crc | Bin 108 -> 0 bytes .../_delta_log/.00000000000000000002.json.crc | Bin 16 -> 0 bytes .../_delta_log/.00000000000000000003.crc.crc | Bin 112 -> 0 bytes .../_delta_log/.00000000000000000003.json.crc | Bin 64 -> 0 bytes .../_delta_log/.00000000000000000004.crc.crc | Bin 112 -> 0 bytes .../_delta_log/.00000000000000000004.json.crc | Bin 16 -> 0 bytes .../_delta_log/00000000000000000000.crc | 1 - .../_delta_log/00000000000000000001.crc | 1 - .../_delta_log/00000000000000000002.crc | 1 - .../_delta_log/00000000000000000003.crc | 1 - .../_delta_log/00000000000000000004.crc | 1 - ...r_b6a98cdd-7843-470d-8897-708cdffa38c5.bin.crc | Bin 12 -> 0 bytes .../_delta_log/.00000000000000000000.crc.crc | Bin 32 -> 0 bytes .../_delta_log/.00000000000000000000.json.crc | Bin 24 -> 0 bytes .../_delta_log/.00000000000000000001.crc.crc | Bin 32 -> 0 bytes .../_delta_log/.00000000000000000001.json.crc | Bin 24 -> 0 bytes ...4437-8b2d-ce6a1a94a34a-c000.snappy.parquet.crc | Bin 16 -> 0 bytes 45 files changed, 5 deletions(-) delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00000-f5c18e7b-d1bf-4ba5-85dd-e63ddc5931bf-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00001-5dbf0ba2-220a-4770-8e26-18a77cf875f0-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00002-5459a52f-3fd3-4b79-83a6-e7f57db28650-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00003-0e842060-9e04-4896-ba21-029309ab8736-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000001.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000001.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000002.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000002.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000000.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000002.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000003.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000004.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/.deletion_vector_b6a98cdd-7843-470d-8897-708cdffa38c5.bin.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000000.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.crc.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/_delta_log/.00000000000000000001.json.crc delete mode 100644 core/src/test/resources/delta/table-with-dv-small/r4/.part-00000-5521fc5e-6e49-4437-8b2d-ce6a1a94a34a-c000.snappy.parquet.crc diff --git a/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc b/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_44ccbf3f-b223-4581-9cd8-a7e569120ada.bin.crc deleted file mode 100644 index 3f9d864cd60ea5d77ec01061376d6754f007314f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Cu6apgDw6$1o4 diff --git a/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc b/core/src/test/resources/delta/table-with-dv-large/.deletion_vector_afcbf9f8-7558-4a5a-b1e2-7432c30bf452.bin.crc deleted file mode 100644 index 94dcca7d8bfbf1b8e820305cd4a38b82e6a27e9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}9)~aHRtP6kG%P diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00000-51219d56-88a7-41cc-be5d-eada75aceb4f-c000.snappy.parquet.crc deleted file mode 100644 index 400a49907b2d88ddd9b27a008eef05368a71e30b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}Bj5bxI9$hv-`XCUym6 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00000-7c52eadd-8da7-4782-a5d5-621cd92cab11-c000.snappy.parquet.crc deleted file mode 100644 index 9b96bf257cec251894d424489f5b699327e17539..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}CtNVwcC diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00004-a72dbdec-2d0e-43d8-a756-4d0d63ef9fcb-c000.snappy.parquet.crc deleted file mode 100644 index 93d5490dc8b99e758770c122ddd480408f815072..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}AW@-D#~z;&N{QCVmAg diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00005-0972979f-852d-4f3e-8f64-bf0bf072de5f-c000.snappy.parquet.crc deleted file mode 100644 index 3154da5403f4dc80d2287b0d8d393493b7c0969e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}CsXze3@9z6KKjCGZ5o diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00006-227c6a1e-0180-4feb-8816-19eccf7939f5-c000.snappy.parquet.crc deleted file mode 100644 index f8791d82db7fbbcc9b24c70d3fbbbfe3a3dd658c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}A7jxsdwT`DYpcB~Asp diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00007-7c37e5e3-abb2-419e-8cba-eba4eeb3b11a-c000.snappy.parquet.crc deleted file mode 100644 index 1d9af4a5bf8a6358d5652770b652dccf9a48dda4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}CVkl4QT*%dV#YC8P#5 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00008-1a0b4375-bbcc-4f3c-8e51-ecb551c89430-c000.snappy.parquet.crc deleted file mode 100644 index 583a2ab3e1e4098ea39846e9c2005d95a1807715..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}DIVe9>B5;nfZRBI5+> diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00009-52689115-1770-4f15-b98d-b942db5b7359-c000.snappy.parquet.crc deleted file mode 100644 index f39bfec8dbf30a05c1a118a2d1c26218171c8f18..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}7i}i(9&5GP@xFAo~PO diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00010-7f35fa1b-7993-4aff-8f60-2b76f1eb3f2c-c000.snappy.parquet.crc deleted file mode 100644 index 74f50ae4c9fb009a3ed80012927d1726bb8f751c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}Bgw|Dd>t&J|+-BK-u5 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00011-fce7841f-be9a-43b8-b283-9e2308ef5487-c000.snappy.parquet.crc deleted file mode 100644 index cf4304bacaa68a8ca93a59519eaa9055ea00fbbd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}BIv9N~8`taU#CA{GT5 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00012-9b83c213-31ff-4b2c-a5d9-be1a2bc2431d-c000.snappy.parquet.crc deleted file mode 100644 index 2a3bbd225ca212f0a83d522f69c784c9e69297b4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}6ZE6?1M!^~6K~B_suO diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00013-c6b05dd2-0143-4e9f-a231-1a2d08a83a0e-c000.snappy.parquet.crc deleted file mode 100644 index 155e26861bd621ba80d95fa10031a7cc26cbcbf4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}E^bzG~famc0o8DX|67 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00014-41a4f51e-62cd-41f5-bb03-afba1e70ea29-c000.snappy.parquet.crc deleted file mode 100644 index 04f2d213b20b04be5d0b80ae9762a4f4c74ca68c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}Et0bUOFCaZfD(BY6dc diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00015-f2f141bb-fa8f-4553-a5db-d1b8d682153b-c000.snappy.parquet.crc deleted file mode 100644 index 2d577971e110f626089fc613a32d5fdd2806dfcf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}E@K{`3E(p8~M}F17~K diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00016-d8f58ffc-8bff-4e12-b709-e628f9bf2553-c000.snappy.parquet.crc deleted file mode 100644 index 897bcadfb0d5cc6787469338e361e7ca8352174e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}E^563gN3bJ`F9BMk(O diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00017-45bac3c9-7eb8-42cb-bb51-fc5b4dd0be10-c000.snappy.parquet.crc deleted file mode 100644 index 6822a31b3d304f6904b564ed161c328ba696620d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}7k8blh#d((VEPAtMCb diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00018-9d74a51b-b800-4e4d-a258-738e585a78a5-c000.snappy.parquet.crc deleted file mode 100644 index d034d5de558962f71f26664dc31ca5b8e8226d66..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}E4@pU16xXSEOj9AE?5 diff --git a/core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc b/core/src/test/resources/delta/table-with-dv-large/.part-00019-a9bb3ce8-afba-47ec-8451-13edcd855b15-c000.snappy.parquet.crc deleted file mode 100644 index 39969d9e6de8a2f3e56b46f5b97938e7c4b2ba1a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16 XcmYc;N@ieSU}DhKpJGvW`>qxMAQ}Y2 diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000000.crc.crc deleted file mode 100644 index 4ba40ce79ae2470e47b095bfc804e4d98f2e9c9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 100 zcmV-q0Gt0~a$^7h00IDYL*12uLIo(IXiNrgNyR>@r^CBT^VJ54ryPEnJOkbKO3E!v zKL1Q0E2}^JhlLy>uJ4!a0n^w>KjtWO$g3s8%L!3&dtKYnvATuu@hIqJXf?8V34wyL&I z@usjOX_4_;~~-|`9DgjPAgOG$&2ZJX^un~0$DuXHR9N;cR)YLw!<&?z+3Ep+D)8@Mgl Sits4vM=}i#Wkzqr$b#dfJ~I6P diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000003.json.crc deleted file mode 100644 index 3b50092592977039cfd6008982e9d46f6b9b7a5a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 64 zcmV-G0KflZa$^7h00IDZk3Z;$D))2(>wGcs++fVXaR*8T%Kn%raaQJx+|2#t4^I1R WR^JeiU8lyiQ5-=LEa$FD<2^M diff --git a/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc b/core/src/test/resources/delta/table-with-dv-large/_delta_log/.00000000000000000004.crc.crc deleted file mode 100644 index 4902114e72b434b36c386190a20992276c5373b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 112 zcmV-$0FVD;a$^7h00IEA!EoxVCdcelw&r;ye|?@$jvB?d5m!mUoheujCIwVv)g=w= zm&hC~k>;~~-|`9DgjPAgOG$&2ZJX^un~0z-fk@@Y1L9(hJ;L&r