From 1060cbf56b54bc5af5cd8553571f667ffb70760b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 4 Mar 2023 16:08:47 -0500 Subject: [PATCH 1/3] Minor: Move ObjectStoreRegistry to datafusion_execution --- datafusion/core/src/datasource/listing/mod.rs | 51 ++++++++++++++++++- datafusion/core/src/datasource/mod.rs | 4 +- datafusion/core/src/execution/runtime_env.rs | 6 ++- datafusion/execution/Cargo.toml | 3 ++ datafusion/execution/src/lib.rs | 1 + .../src}/object_store.rs | 40 --------------- 6 files changed, 61 insertions(+), 44 deletions(-) rename datafusion/{core/src/datasource => execution/src}/object_store.rs (87%) diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs index 27cf14122690..d5374d1edcea 100644 --- a/datafusion/core/src/datasource/listing/mod.rs +++ b/datafusion/core/src/datasource/listing/mod.rs @@ -59,7 +59,7 @@ pub struct PartitionedFile { pub partition_values: Vec, /// An optional file range for a more fine-grained parallel execution pub range: Option, - /// An optional field for user defined per object metadata + /// An optional field for user defined per object metadata pub extensions: Option>, } @@ -103,3 +103,52 @@ impl From for PartitionedFile { } } } + +#[cfg(test)] +mod tests { + use datafusion_execution::object_store::ObjectStoreRegistry; + use object_store::local::LocalFileSystem; + + use super::*; + + #[test] + fn test_object_store_listing_url() { + let listing = ListingTableUrl::parse("file:///").unwrap(); + let store = listing.object_store(); + assert_eq!(store.as_str(), "file:///"); + + let listing = ListingTableUrl::parse("s3://bucket/").unwrap(); + let store = listing.object_store(); + assert_eq!(store.as_str(), "s3://bucket/"); + } + + #[test] + fn test_get_by_url_hdfs() { + let sut = ObjectStoreRegistry::default(); + sut.register_store("hdfs", "localhost:8020", Arc::new(LocalFileSystem::new())); + let url = ListingTableUrl::parse("hdfs://localhost:8020/key").unwrap(); + sut.get_by_url(&url).unwrap(); + } + + #[test] + fn test_get_by_url_s3() { + let sut = ObjectStoreRegistry::default(); + sut.register_store("s3", "bucket", Arc::new(LocalFileSystem::new())); + let url = ListingTableUrl::parse("s3://bucket/key").unwrap(); + sut.get_by_url(&url).unwrap(); + } + + #[test] + fn test_get_by_url_file() { + let sut = ObjectStoreRegistry::default(); + let url = ListingTableUrl::parse("file:///bucket/key").unwrap(); + sut.get_by_url(&url).unwrap(); + } + + #[test] + fn test_get_by_url_local() { + let sut = ObjectStoreRegistry::default(); + let url = ListingTableUrl::parse("../").unwrap(); + sut.get_by_url(&url).unwrap(); + } +} diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index a0d5121090ee..6fa15ab0216c 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -25,10 +25,12 @@ pub mod file_format; pub mod listing; pub mod listing_table_factory; pub mod memory; -pub mod object_store; pub mod streaming; pub mod view; +// backwards compatibility +pub use datafusion_execution::object_store; + use futures::Stream; pub use self::datasource::TableProvider; diff --git a/datafusion/core/src/execution/runtime_env.rs b/datafusion/core/src/execution/runtime_env.rs index d559e7c7fa35..6a7bb18c9862 100644 --- a/datafusion/core/src/execution/runtime_env.rs +++ b/datafusion/core/src/execution/runtime_env.rs @@ -26,9 +26,11 @@ use std::collections::HashMap; use crate::datasource::datasource::TableProviderFactory; use crate::datasource::listing_table_factory::ListingTableFactory; -use crate::datasource::object_store::ObjectStoreRegistry; -use crate::execution::memory_pool::{GreedyMemoryPool, MemoryPool, UnboundedMemoryPool}; use datafusion_common::DataFusionError; +use datafusion_execution::{ + memory_pool::{GreedyMemoryPool, MemoryPool, UnboundedMemoryPool}, + object_store::ObjectStoreRegistry, +}; use object_store::ObjectStore; use std::fmt::{Debug, Formatter}; use std::path::PathBuf; diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 3753d40df5fc..6c9a2d2fa65b 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -36,8 +36,11 @@ path = "src/lib.rs" [dependencies] datafusion-common = { path = "../common", version = "19.0.0" } datafusion-expr = { path = "../expr", version = "19.0.0" } +dashmap = "5.4.0" hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" +object_store = "0.5.4" parking_lot = "0.12" rand = "0.8" tempfile = "3" +url = "2.2" \ No newline at end of file diff --git a/datafusion/execution/src/lib.rs b/datafusion/execution/src/lib.rs index b8e6debb923a..55db55cf0a6a 100644 --- a/datafusion/execution/src/lib.rs +++ b/datafusion/execution/src/lib.rs @@ -17,4 +17,5 @@ pub mod disk_manager; pub mod memory_pool; +pub mod object_store; pub mod registry; diff --git a/datafusion/core/src/datasource/object_store.rs b/datafusion/execution/src/object_store.rs similarity index 87% rename from datafusion/core/src/datasource/object_store.rs rename to datafusion/execution/src/object_store.rs index 8f1fc30f971e..9c5cca84799c 100644 --- a/datafusion/core/src/datasource/object_store.rs +++ b/datafusion/execution/src/object_store.rs @@ -217,22 +217,12 @@ impl ObjectStoreRegistry { #[cfg(test)] mod tests { use super::*; - use crate::datasource::listing::ListingTableUrl; - use std::sync::Arc; #[test] fn test_object_store_url() { - let listing = ListingTableUrl::parse("file:///").unwrap(); - let store = listing.object_store(); - assert_eq!(store.as_str(), "file:///"); - let file = ObjectStoreUrl::parse("file://").unwrap(); assert_eq!(file.as_str(), "file:///"); - let listing = ListingTableUrl::parse("s3://bucket/").unwrap(); - let store = listing.object_store(); - assert_eq!(store.as_str(), "s3://bucket/"); - let url = ObjectStoreUrl::parse("s3://bucket").unwrap(); assert_eq!(url.as_str(), "s3://bucket/"); @@ -255,34 +245,4 @@ mod tests { ObjectStoreUrl::parse("s3://username:password@host:123/foo").unwrap_err(); assert_eq!(err.to_string(), "Execution error: ObjectStoreUrl must only contain scheme and authority, got: /foo"); } - - #[test] - fn test_get_by_url_hdfs() { - let sut = ObjectStoreRegistry::default(); - sut.register_store("hdfs", "localhost:8020", Arc::new(LocalFileSystem::new())); - let url = ListingTableUrl::parse("hdfs://localhost:8020/key").unwrap(); - sut.get_by_url(&url).unwrap(); - } - - #[test] - fn test_get_by_url_s3() { - let sut = ObjectStoreRegistry::default(); - sut.register_store("s3", "bucket", Arc::new(LocalFileSystem::new())); - let url = ListingTableUrl::parse("s3://bucket/key").unwrap(); - sut.get_by_url(&url).unwrap(); - } - - #[test] - fn test_get_by_url_file() { - let sut = ObjectStoreRegistry::default(); - let url = ListingTableUrl::parse("file:///bucket/key").unwrap(); - sut.get_by_url(&url).unwrap(); - } - - #[test] - fn test_get_by_url_local() { - let sut = ObjectStoreRegistry::default(); - let url = ListingTableUrl::parse("../").unwrap(); - sut.get_by_url(&url).unwrap(); - } } From ddea59a65de24dd7b7ff487af95d3a30de77c674 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 5 Mar 2023 06:51:38 -0500 Subject: [PATCH 2/3] Update cargo.lock --- datafusion-cli/Cargo.lock | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 78847f39290d..0ac7b5c7bf73 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -293,9 +293,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "095183a3539c7c7649b2beb87c2d3f0591f3a7fed07761cc546d244e27e0238c" dependencies = [ "proc-macro2", "quote", @@ -749,13 +749,16 @@ dependencies = [ name = "datafusion-execution" version = "19.0.0" dependencies = [ + "dashmap", "datafusion-common", "datafusion-expr", "hashbrown 0.13.2", "log", + "object_store", "parking_lot", "rand", "tempfile", + "url", ] [[package]] @@ -1367,9 +1370,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jobserver" @@ -1824,9 +1827,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -2058,9 +2061,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.8" +version = "0.36.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" +checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" dependencies = [ "bitflags", "errno", @@ -2122,9 +2125,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "same-file" @@ -2143,9 +2146,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "5d5e082f6ea090deaf0e6dd04b68360fd5cddb152af6ce8927c9d25db299f98c" [[package]] name = "sct" @@ -2268,9 +2271,9 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", @@ -2574,9 +2577,9 @@ checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "775c11906edafc97bc378816b94585fbd9a054eabaf86fdd0ced94af449efab7" [[package]] name = "unicode-normalization" From ca801b483d86d454e9682624fd495aa43801b02d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 5 Mar 2023 07:18:37 -0500 Subject: [PATCH 3/3] tomlformat --- datafusion/execution/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 6c9a2d2fa65b..9e68acc7d999 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -34,13 +34,13 @@ path = "src/lib.rs" [dependencies] +dashmap = "5.4.0" datafusion-common = { path = "../common", version = "19.0.0" } datafusion-expr = { path = "../expr", version = "19.0.0" } -dashmap = "5.4.0" hashbrown = { version = "0.13", features = ["raw"] } log = "^0.4" object_store = "0.5.4" parking_lot = "0.12" rand = "0.8" tempfile = "3" -url = "2.2" \ No newline at end of file +url = "2.2"