From da228654176ae7fffdbb167cf5bf851e3f45a7e9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 8 Nov 2023 09:07:54 -0700 Subject: [PATCH 1/4] remove duplicate version numbers for arrow, object_store, and parquet dependencies --- Cargo.toml | 4 ++-- datafusion-examples/Cargo.toml | 2 +- datafusion/common/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- datafusion/wasmtest/Cargo.toml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 39ebd1fa59b5..e7a4126743f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,9 +79,9 @@ indexmap = "2.0.0" itertools = "0.11" log = "^0.4" num_cpus = "1.13.0" -object_store = "0.7.0" +object_store = { version = "0.7.0", default-features = false } parking_lot = "0.12" -parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] } +parquet = { version = "48.0.0", default-features = false, features = ["arrow", "async", "object_store"] } rand = "0.8" rstest = "0.18.0" serde_json = "1" diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 57691520a401..676b4aaa78c0 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -46,7 +46,7 @@ futures = { workspace = true } log = { workspace = true } mimalloc = { version = "0.1", default-features = false } num_cpus = { workspace = true } -object_store = { version = "0.7.0", features = ["aws", "http"] } +object_store = { workspace = true, features = ["aws", "http"] } prost = { version = "0.12", default-features = false } prost-derive = { version = "0.11", default-features = false } serde = { version = "1.0.136", features = ["derive"] } diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index d04db86b7830..e72f477b1e34 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,7 +47,7 @@ arrow-schema = { workspace = true } chrono = { workspace = true } half = { version = "2.1", default-features = false } num_cpus = { workspace = true } -object_store = { version = "0.7.0", default-features = false, optional = true } +object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true } pyo3 = { version = "0.20.0", optional = true } sqlparser = { workspace = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index ac3439a64ca8..4dda689fff4c 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -46,7 +46,7 @@ chrono = { workspace = true } datafusion = { path = "../core", version = "33.0.0" } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } -object_store = { version = "0.7.0" } +object_store = { workspace = true } pbjson = { version = "0.5", optional = true } prost = "0.12.0" serde = { version = "1.0", optional = true } diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index 882b02bcc84b..c5f795d0653a 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -46,5 +46,5 @@ datafusion-sql = { workspace = true } # getrandom must be compiled with js feature getrandom = { version = "0.2.8", features = ["js"] } -parquet = { version = "48.0.0", default-features = false } +parquet = { workspace = true } wasm-bindgen = "0.2.87" From 07ef4effbaf3b673e4dc509f7118688013440aae Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 8 Nov 2023 09:20:23 -0700 Subject: [PATCH 2/4] cargo update --- datafusion-cli/Cargo.lock | 100 ++++++++++---------------------------- 1 file changed, 25 insertions(+), 75 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 74df8aab0175..a35320dedaec 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -46,21 +46,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - [[package]] name = "allocator-api2" version = "0.2.16" @@ -383,7 +368,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -767,27 +752,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "brotli" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - [[package]] name = "bstr" version = "1.7.0" @@ -1073,7 +1037,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37e366bff8cd32dd8754b0991fb66b279dc48f598c3a18914852a6673deef583" dependencies = [ "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1422,9 +1386,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e" dependencies = [ "libc", "windows-sys", @@ -1572,7 +1536,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -1623,9 +1587,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", @@ -2064,9 +2028,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" [[package]] name = "lock_api" @@ -2084,15 +2048,6 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" -[[package]] -name = "lz4_flex" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" -dependencies = [ - "twox-hash", -] - [[package]] name = "lzma-sys" version = "0.1.20" @@ -2378,23 +2333,18 @@ dependencies = [ "arrow-schema", "arrow-select", "base64", - "brotli", "bytes", "chrono", - "flate2", "futures", "hashbrown 0.14.2", - "lz4_flex", "num", "num-bigint", "object_store", "paste", "seq-macro", - "snap", "thrift", "tokio", "twox-hash", - "zstd 0.13.0", ] [[package]] @@ -2483,7 +2433,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -2992,22 +2942,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.190" +version = "1.0.192" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3183,7 +3133,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3205,9 +3155,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -3286,7 +3236,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3378,7 +3328,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3475,7 +3425,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3520,7 +3470,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] @@ -3674,7 +3624,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-shared", ] @@ -3708,7 +3658,7 @@ checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3906,7 +3856,7 @@ checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.39", ] [[package]] From 1ccb958ae04c10403ac56c3e38553c3c413f7190 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 8 Nov 2023 09:39:40 -0700 Subject: [PATCH 3/4] use default features in parquet crate --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e7a4126743f2..9396ee4d2d39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ log = "^0.4" num_cpus = "1.13.0" object_store = { version = "0.7.0", default-features = false } parking_lot = "0.12" -parquet = { version = "48.0.0", default-features = false, features = ["arrow", "async", "object_store"] } +parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] } rand = "0.8" rstest = "0.18.0" serde_json = "1" From 6a2caa08e67bbb838820af83f0a1e98af5252188 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 8 Nov 2023 10:13:00 -0700 Subject: [PATCH 4/4] disable default parquet features in wasmtest --- Cargo.toml | 2 +- benchmarks/Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 50 ++++++++++++++++++++++++++++++++++++ datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 2 +- 5 files changed, 54 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9396ee4d2d39..e7a4126743f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ log = "^0.4" num_cpus = "1.13.0" object_store = { version = "0.7.0", default-features = false } parking_lot = "0.12" -parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] } +parquet = { version = "48.0.0", default-features = false, features = ["arrow", "async", "object_store"] } rand = "0.8" rstest = "0.18.0" serde_json = "1" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 35f94f677d86..c5a24a0a5cf9 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -41,7 +41,7 @@ futures = { workspace = true } log = { workspace = true } mimalloc = { version = "0.1", optional = true, default-features = false } num_cpus = { workspace = true } -parquet = { workspace = true } +parquet = { workspace = true, default-features = true } serde = { version = "1.0.136", features = ["derive"] } serde_json = { workspace = true } snmalloc-rs = { version = "0.3", optional = true } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a35320dedaec..629293e4839b 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -46,6 +46,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "allocator-api2" version = "0.2.16" @@ -752,6 +767,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bstr" version = "1.7.0" @@ -2048,6 +2084,15 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "lz4_flex" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" +dependencies = [ + "twox-hash", +] + [[package]] name = "lzma-sys" version = "0.1.20" @@ -2333,18 +2378,23 @@ dependencies = [ "arrow-schema", "arrow-select", "base64", + "brotli", "bytes", "chrono", + "flate2", "futures", "hashbrown 0.14.2", + "lz4_flex", "num", "num-bigint", "object_store", "paste", "seq-macro", + "snap", "thrift", "tokio", "twox-hash", + "zstd 0.13.0", ] [[package]] diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index e72f477b1e34..b3a810153923 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -48,7 +48,7 @@ chrono = { workspace = true } half = { version = "2.1", default-features = false } num_cpus = { workspace = true } object_store = { workspace = true, optional = true } -parquet = { workspace = true, optional = true } +parquet = { workspace = true, optional = true, default-features = true } pyo3 = { version = "0.20.0", optional = true } sqlparser = { workspace = true } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index b44914ec719f..80aec800d697 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -81,7 +81,7 @@ num-traits = { version = "0.2", optional = true } num_cpus = { workspace = true } object_store = { workspace = true } parking_lot = { workspace = true } -parquet = { workspace = true, optional = true } +parquet = { workspace = true, optional = true, default-features = true } pin-project-lite = "^0.2.7" rand = { workspace = true } sqlparser = { workspace = true }