From fba548ef7028237dcda9815a572da2d6fb92d3c5 Mon Sep 17 00:00:00 2001 From: Dylan Chen Date: Wed, 30 Oct 2024 16:13:27 +0800 Subject: [PATCH 1/5] bump iceberg-rust --- Cargo.lock | 313 +++++++++++------- Cargo.toml | 10 +- .../iceberg/storage_catalog.rs | 4 +- .../src/source/iceberg/parquet_file_reader.rs | 20 +- 4 files changed, 213 insertions(+), 134 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index db421dd9ac45..12fc55f45ad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,10 +405,10 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03675e42d1560790f3524800e41403b40d0da1c793fe9528929fde06d8c7649a" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", "chrono", "half 2.3.1", "num", @@ -449,14 +449,14 @@ dependencies = [ [[package]] name = "arrow-array" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" +checksum = "d39387ca628be747394890a6e47f138ceac1aa912eab64f02519fed24b637af8" dependencies = [ "ahash 0.8.11", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", "chrono", "half 2.3.1", "hashbrown 0.14.5", @@ -487,9 +487,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" +checksum = "9e51e05228852ffe3eb391ce7178a0f97d2cf80cc6ef91d3c4a6b3cb688049ec" dependencies = [ "bytes", "half 2.3.1", @@ -510,7 +510,7 @@ dependencies = [ "base64 0.21.7", "chrono", "half 2.3.1", - "lexical-core", + "lexical-core 0.8.5", "num", ] @@ -530,27 +530,27 @@ dependencies = [ "chrono", "comfy-table", "half 2.3.1", - "lexical-core", + "lexical-core 0.8.5", "num", "ryu", ] [[package]] name = "arrow-cast" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ce1018bb710d502f9db06af026ed3561552e493e989a79d0d0f5d9cf267a785" +checksum = "d09aea56ec9fa267f3f3f6cdab67d8a9974cbba90b3aa38c8fe9d0bb071bd8c1" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "atoi", "base64 0.22.0", "chrono", "half 2.3.1", - "lexical-core", + "lexical-core 1.0.2", "num", "ryu", ] @@ -570,7 +570,7 @@ dependencies = [ "csv", "csv-core", "lazy_static", - "lexical-core", + "lexical-core 0.8.5", "regex", ] @@ -600,12 +600,12 @@ dependencies = [ [[package]] name = "arrow-data" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" +checksum = "b98ae0af50890b494cebd7d6b04b35e896205c1d1df7b29a6272c5d0d0249ef5" dependencies = [ - "arrow-buffer 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer 53.2.0", + "arrow-schema 53.2.0", "half 2.3.1", "num", ] @@ -616,11 +616,11 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b915fb36d935b969894d7909ad417c67ddeadebbbd57c3c168edf64721a37d31" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-cast 53.2.0", + "arrow-ipc 53.2.0", + "arrow-schema 53.2.0", "base64 0.22.0", "bytes", "futures", @@ -662,15 +662,15 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb307482348a1267f91b0912e962cd53440e5de0f7fb24c5f7b10da70b38c94a" +checksum = "0ed91bdeaff5a1c00d28d8f73466bcb64d32bbd7093b5a30156b4b9f4dba3eee" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-cast 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", "flatbuffers 24.3.25", ] @@ -688,7 +688,7 @@ dependencies = [ "chrono", "half 2.3.1", "indexmap 2.6.0", - "lexical-core", + "lexical-core 0.8.5", "num", "serde", "serde_json", @@ -730,11 +730,11 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "644046c479d80ae8ed02a7f1e1399072ea344ca6a7b0e293ab2d5d9ed924aa3b" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "half 2.3.1", "num", ] @@ -775,10 +775,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a29791f8eb13b340ce35525b723f5f0df17ecb955599e11f65c2a94ab34e2efb" dependencies = [ "ahash 0.8.11", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", "half 2.3.1", ] @@ -799,9 +799,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" +checksum = "539ada65246b949bd99ffa0881a9a15a4a529448af1a07a9838dd78617dafab1" [[package]] name = "arrow-select" @@ -833,15 +833,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cc7e6b582e23855fd1625ce46e51647aa440c20ea2e71b1d748e0839dd73cba" +checksum = "6259e566b752da6dceab91766ed8b2e67bf6270eb9ad8a6e07a33c1bede2b125" dependencies = [ "ahash 0.8.11", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", "num", ] @@ -884,11 +884,11 @@ version = "53.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0775b6567c66e56ded19b87a954b6b1beffbdd784ef95a3a2b03f59570c1d230" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-data 53.2.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "memchr", "num", "regex", @@ -901,10 +901,10 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9f5ae23e3833d68bc536f3e744802e2d87db31dc4adbb0541f73bc69cc789a5" dependencies = [ - "arrow-array 53.0.0", + "arrow-array 53.2.0", "arrow-flight", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "futures-util", "thiserror", "tokio", @@ -919,9 +919,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6151bb7f26cde846e14adb17e08282153f7a9250dd78bbab3fa462b66d7b623" dependencies = [ "anyhow", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-schema 53.2.0", "atomic-time", "rquickjs", ] @@ -933,10 +933,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0b80da061a53aac237e711fddb01709002ba2e006f9fd4c72a430d4938dd921" dependencies = [ "anyhow", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-ipc 53.2.0", + "arrow-schema 53.2.0", "pyo3", "pyo3-build-config", ] @@ -948,9 +948,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe31144804e093dd60b4e7a749b64b9454040c05a34ccbeb641fc60fcf5ee92d" dependencies = [ "anyhow", - "arrow-array 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", + "arrow-array 53.2.0", + "arrow-ipc 53.2.0", + "arrow-schema 53.2.0", "async-trait", "base64 0.22.0", "genawaiter", @@ -2240,6 +2240,17 @@ dependencies = [ "brotli-decompressor", ] +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + [[package]] name = "brotli-decompressor" version = "4.0.1" @@ -6282,16 +6293,17 @@ dependencies = [ [[package]] name = "iceberg" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=f8b7bff7bd7853be7733fc1db1203862878f92c1#f8b7bff7bd7853be7733fc1db1203862878f92c1" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" dependencies = [ "anyhow", "apache-avro 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", "array-init", "arrow-arith 53.0.0", - "arrow-array 53.0.0", + "arrow-array 53.2.0", + "arrow-cast 53.2.0", "arrow-ord 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "arrow-string 53.0.0", "async-trait", "bimap", @@ -6304,10 +6316,11 @@ dependencies = [ "itertools 0.13.0", "moka", "murmur3", + "num-bigint", "once_cell", - "opendal 0.50.0", + "opendal 0.50.1", "ordered-float 4.1.1", - "parquet 53.0.0", + "parquet 53.2.0", "paste", "rand", "reqwest 0.12.4", @@ -6327,7 +6340,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=f8b7bff7bd7853be7733fc1db1203862878f92c1#f8b7bff7bd7853be7733fc1db1203862878f92c1" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" dependencies = [ "anyhow", "async-trait", @@ -6344,7 +6357,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=f8b7bff7bd7853be7733fc1db1203862878f92c1#f8b7bff7bd7853be7733fc1db1203862878f92c1" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" dependencies = [ "async-trait", "chrono", @@ -6369,13 +6382,13 @@ dependencies = [ "anyhow", "apache-avro 0.17.0 (git+https://github.com/apache/avro.git)", "arrow-arith 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-cast 53.2.0", "arrow-ord 53.0.0", "arrow-row 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "async-trait", "bitvec", "bytes", @@ -6392,7 +6405,7 @@ dependencies = [ "once_cell", "opendal 0.49.2", "ordered-float 3.9.1", - "parquet 53.0.0", + "parquet 53.2.0", "prometheus", "regex", "reqwest 0.11.20", @@ -6894,11 +6907,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float 1.0.2", + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", + "lexical-write-float 1.0.2", + "lexical-write-integer 1.0.2", ] [[package]] @@ -6907,8 +6933,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", "static_assertions", ] @@ -6918,7 +6955,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -6931,14 +6978,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util 1.0.3", + "lexical-write-integer 1.0.2", "static_assertions", ] @@ -6948,7 +7015,17 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -8279,9 +8356,9 @@ dependencies = [ [[package]] name = "opendal" -version = "0.50.0" +version = "0.50.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36e44fc43be9ffe18dad3e3ef9d61c1ae01991ee6f1c8c026978c35777a711bf" +checksum = "213222b6c86949314d8f51acb26d8241e7c8dd0879b016a79471d49f21ee592f" dependencies = [ "anyhow", "async-trait", @@ -8715,7 +8792,7 @@ dependencies = [ "arrow-schema 52.2.0", "arrow-select 52.2.0", "base64 0.22.0", - "brotli", + "brotli 6.0.0", "bytes", "chrono", "flate2", @@ -8738,20 +8815,20 @@ dependencies = [ [[package]] name = "parquet" -version = "53.0.0" +version = "53.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8" +checksum = "dea02606ba6f5e856561d8d507dba8bac060aefca2a6c0f1aa1d361fed91ff3e" dependencies = [ "ahash 0.8.11", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array 53.2.0", + "arrow-buffer 53.2.0", + "arrow-cast 53.2.0", + "arrow-data 53.2.0", + "arrow-ipc 53.2.0", + "arrow-schema 53.2.0", + "arrow-select 53.2.0", "base64 0.22.0", - "brotli", + "brotli 7.0.0", "bytes", "chrono", "flate2", @@ -10472,7 +10549,7 @@ dependencies = [ "memcomparable", "opendal 0.49.2", "parking_lot 0.12.1", - "parquet 53.0.0", + "parquet 53.2.0", "paste", "prometheus", "prost 0.13.1", @@ -10607,13 +10684,13 @@ dependencies = [ "anyhow", "arc-swap", "arrow-array 52.2.0", - "arrow-array 53.0.0", + "arrow-array 53.2.0", "arrow-buffer 52.2.0", - "arrow-buffer 53.0.0", + "arrow-buffer 53.2.0", "arrow-cast 52.2.0", - "arrow-cast 53.0.0", + "arrow-cast 53.2.0", "arrow-schema 52.2.0", - "arrow-schema 53.0.0", + "arrow-schema 53.2.0", "async-trait", "auto_enums", "auto_impl", @@ -11007,7 +11084,7 @@ dependencies = [ "opensearch", "openssl", "parking_lot 0.12.1", - "parquet 53.0.0", + "parquet 53.2.0", "paste", "pg_bigdecimal", "postgres-openssl", diff --git a/Cargo.toml b/Cargo.toml index da6b0bfa8738..fb1c838f4409 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -139,10 +139,10 @@ prost-build = { version = "0.13" } icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1783f8f106958d6d0ce0249c1c708934a15c2a47", features = [ "prometheus", ] } -# branch dev -iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "f8b7bff7bd7853be7733fc1db1203862878f92c1" } -iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "f8b7bff7bd7853be7733fc1db1203862878f92c1" } -iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "f8b7bff7bd7853be7733fc1db1203862878f92c1" } +# branch dev-rebase-main-20241030 +iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } +iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } +iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } opendal = "0.49" # used only by arrow-udf-flight arrow-flight = "53" @@ -161,7 +161,7 @@ deltalake = { version = "0.20.1", features = [ itertools = "0.13.0" jsonbb = "0.1.4" lru = { git = "https://github.com/risingwavelabs/lru-rs.git", rev = "2682b85" } -parquet = { version = "53", features = ["async"] } +parquet = { version = "53.2", features = ["async"] } thiserror-ext = "0.1.2" tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" } tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [ diff --git a/src/connector/src/connector_common/iceberg/storage_catalog.rs b/src/connector/src/connector_common/iceberg/storage_catalog.rs index cd7bb2ca4ba0..12dfca377ee6 100644 --- a/src/connector/src/connector_common/iceberg/storage_catalog.rs +++ b/src/connector/src/connector_common/iceberg/storage_catalog.rs @@ -74,7 +74,7 @@ impl StorageCatalog { /// `table_path`: relative path of table dir under warehouse root. async fn is_version_hint_exist(&self, table_path: &str) -> Result { self.file_io - .is_exist(format!("{table_path}/metadata/version-hint.text").as_str()) + .exists(format!("{table_path}/metadata/version-hint.text").as_str()) .await .map_err(|err| { Error::new( @@ -308,7 +308,7 @@ impl Catalog for StorageCatalog { } }; let metadata_path = format!("{table_path}/metadata/version-hint.text"); - self.file_io.is_exist(&metadata_path).await.map_err(|err| { + self.file_io.exists(&metadata_path).await.map_err(|err| { Error::new( ErrorKind::Unexpected, format!("Failed to check if table exists: {}", err.as_report()), diff --git a/src/connector/src/source/iceberg/parquet_file_reader.rs b/src/connector/src/source/iceberg/parquet_file_reader.rs index eb98b2fdad21..5a01f2b0ed84 100644 --- a/src/connector/src/source/iceberg/parquet_file_reader.rs +++ b/src/connector/src/source/iceberg/parquet_file_reader.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use anyhow::anyhow; use bytes::Bytes; use futures::future::BoxFuture; -use futures::TryFutureExt; +use futures::{FutureExt, TryFutureExt}; use iceberg::io::{ FileIOBuilder, FileMetadata, FileRead, S3_ACCESS_KEY_ID, S3_REGION, S3_SECRET_ACCESS_KEY, }; @@ -27,9 +27,9 @@ use iceberg::{Error, ErrorKind}; use opendal::layers::RetryLayer; use opendal::services::S3; use opendal::Operator; -use parquet::arrow::async_reader::{AsyncFileReader, MetadataLoader}; +use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::ParquetRecordBatchStreamBuilder; -use parquet::file::metadata::ParquetMetaData; +use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; use url::Url; pub struct ParquetFileReader { @@ -53,12 +53,14 @@ impl AsyncFileReader for ParquetFileReader { } fn get_metadata(&mut self) -> BoxFuture<'_, parquet::errors::Result>> { - Box::pin(async move { - let file_size = self.meta.size; - let mut loader = MetadataLoader::load(self, file_size as usize, None).await?; - loader.load_page_index(false, false).await?; - Ok(Arc::new(loader.finish())) - }) + async move { + let reader = ParquetMetaDataReader::new(); + let size = self.meta.size as usize; + let meta = reader.load_and_finish(self, size).await?; + + Ok(Arc::new(meta)) + } + .boxed() } } From 36e7da49e25c15134e12c3348e4eb2abc4a2bad5 Mon Sep 17 00:00:00 2001 From: Dylan Chen Date: Thu, 31 Oct 2024 15:49:16 +0800 Subject: [PATCH 2/5] fix arrow type check --- Cargo.lock | 6 +++--- Cargo.toml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8155c1b3a230..e41bc748f82b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6299,7 +6299,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" dependencies = [ "anyhow", "apache-avro 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -6346,7 +6346,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" dependencies = [ "anyhow", "async-trait", @@ -6363,7 +6363,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=13a561fdcc754b6f4f872f39bb71f484570107ff#13a561fdcc754b6f4f872f39bb71f484570107ff" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" dependencies = [ "async-trait", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 215699799913..c8ee32eca46b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,9 +140,9 @@ icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1783f8 "prometheus", ] } # branch dev-rebase-main-20241030 -iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } -iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } -iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "13a561fdcc754b6f4f872f39bb71f484570107ff" } +iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } +iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } +iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } opendal = "0.49" # used only by arrow-udf-flight arrow-flight = "53" From 82358201394e99d0a1aa3ad7ff9c85088768f6d4 Mon Sep 17 00:00:00 2001 From: Dylan Chen Date: Thu, 31 Oct 2024 23:41:51 +0800 Subject: [PATCH 3/5] revert iceberg-rust record batch reorder --- Cargo.lock | 8 ++++---- Cargo.toml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e41bc748f82b..4d9e65b1b74d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -6299,7 +6299,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" dependencies = [ "anyhow", "apache-avro 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -6346,7 +6346,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" dependencies = [ "anyhow", "async-trait", @@ -6363,7 +6363,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=c26805d94e82037705048ea74fc0386987d955ad#c26805d94e82037705048ea74fc0386987d955ad" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" dependencies = [ "async-trait", "chrono", diff --git a/Cargo.toml b/Cargo.toml index c8ee32eca46b..44bf6d9d3201 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,9 +140,9 @@ icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1783f8 "prometheus", ] } # branch dev-rebase-main-20241030 -iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } -iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } -iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "c26805d94e82037705048ea74fc0386987d955ad" } +iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } +iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } +iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } opendal = "0.49" # used only by arrow-udf-flight arrow-flight = "53" From 0de6a2a8aec28d920836e54506ce2fdba75bb506 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 13:40:15 +0800 Subject: [PATCH 4/5] fix position delete --- src/connector/src/source/iceberg/mod.rs | 26 ++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs index 60a26e43e1d3..60b34fb629dd 100644 --- a/src/connector/src/source/iceberg/mod.rs +++ b/src/connector/src/source/iceberg/mod.rs @@ -270,7 +270,31 @@ impl IcebergSplitEnumerator { equality_delete_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); } iceberg::spec::DataContentType::PositionDeletes => { - task.project_field_ids = Vec::default(); + // Dont need to do anything here + } + } + } + + let scan = table + .scan() + .snapshot_id(snapshot_id) + .select_all() + .build() + .map_err(|e| anyhow!(e))?; + + let file_scan_stream = scan.plan_files().await.map_err(|e| anyhow!(e))?; + + #[for_await] + for task in file_scan_stream { + let task: FileScanTask = task.map_err(|e| anyhow!(e))?; + match task.data_file_content { + iceberg::spec::DataContentType::Data => { + // Dont need to do anything here + } + iceberg::spec::DataContentType::EqualityDeletes => { + // Dont need to do anything here + } + iceberg::spec::DataContentType::PositionDeletes => { position_delete_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); } } From 1b304a67df1e703961edb95e08c8bd9d0fbe3fc1 Mon Sep 17 00:00:00 2001 From: xxhZs <1060434431@qq.com> Date: Fri, 1 Nov 2024 14:52:02 +0800 Subject: [PATCH 5/5] fix position del fix position del --- Cargo.lock | 6 +++--- Cargo.toml | 6 +++--- src/connector/src/source/iceberg/mod.rs | 26 +------------------------ 3 files changed, 7 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d9e65b1b74d..010acee43cf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6299,7 +6299,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=e28726443a57028f7c7df11d6d385470dc484d46#e28726443a57028f7c7df11d6d385470dc484d46" dependencies = [ "anyhow", "apache-avro 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -6346,7 +6346,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=e28726443a57028f7c7df11d6d385470dc484d46#e28726443a57028f7c7df11d6d385470dc484d46" dependencies = [ "anyhow", "async-trait", @@ -6363,7 +6363,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.3.0" -source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=8c486d626cc5d42badeb8e7fb2001911e1bfdb89#8c486d626cc5d42badeb8e7fb2001911e1bfdb89" +source = "git+https://github.com/risingwavelabs/iceberg-rust.git?rev=e28726443a57028f7c7df11d6d385470dc484d46#e28726443a57028f7c7df11d6d385470dc484d46" dependencies = [ "async-trait", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 44bf6d9d3201..9b544017c4d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,9 +140,9 @@ icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1783f8 "prometheus", ] } # branch dev-rebase-main-20241030 -iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } -iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } -iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "8c486d626cc5d42badeb8e7fb2001911e1bfdb89" } +iceberg = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "e28726443a57028f7c7df11d6d385470dc484d46" } +iceberg-catalog-rest = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "e28726443a57028f7c7df11d6d385470dc484d46" } +iceberg-catalog-glue = { git = "https://github.com/risingwavelabs/iceberg-rust.git", rev = "e28726443a57028f7c7df11d6d385470dc484d46" } opendal = "0.49" # used only by arrow-udf-flight arrow-flight = "53" diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs index 60b34fb629dd..99055bd8fafb 100644 --- a/src/connector/src/source/iceberg/mod.rs +++ b/src/connector/src/source/iceberg/mod.rs @@ -270,31 +270,7 @@ impl IcebergSplitEnumerator { equality_delete_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); } iceberg::spec::DataContentType::PositionDeletes => { - // Dont need to do anything here - } - } - } - - let scan = table - .scan() - .snapshot_id(snapshot_id) - .select_all() - .build() - .map_err(|e| anyhow!(e))?; - - let file_scan_stream = scan.plan_files().await.map_err(|e| anyhow!(e))?; - - #[for_await] - for task in file_scan_stream { - let task: FileScanTask = task.map_err(|e| anyhow!(e))?; - match task.data_file_content { - iceberg::spec::DataContentType::Data => { - // Dont need to do anything here - } - iceberg::spec::DataContentType::EqualityDeletes => { - // Dont need to do anything here - } - iceberg::spec::DataContentType::PositionDeletes => { + task.project_field_ids = vec![]; position_delete_files.push(IcebergFileScanTaskJsonStr::serialize(&task)); } }