From 0dbf749036ff3efd92b3464a5ce60728d0166319 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 8 Jan 2024 07:59:29 -0500 Subject: [PATCH 1/6] start read benchmark --- Cargo.lock | 17 +++++++ crates/hdfs-native/Cargo.toml | 9 +++- crates/hdfs-native/benches/read.rs | 71 ++++++++++++++++++++++++++++++ crates/hdfs-native/src/lib.rs | 2 +- 4 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 crates/hdfs-native/benches/read.rs diff --git a/Cargo.lock b/Cargo.lock index 9deddcc..4998b87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -302,6 +302,7 @@ dependencies = [ "ciborium", "clap", "criterion-plot", + "futures", "is-terminal", "itertools 0.10.5", "num-traits", @@ -314,6 +315,7 @@ dependencies = [ "serde_derive", "serde_json", "tinytemplate", + "tokio", "walkdir", ] @@ -448,6 +450,20 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-hdfs3" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f38e500596a428817fd4fd8a9a21da32f4edb3250e87886039670b12ea02f5d" +dependencies = [ + "bindgen", + "cc", + "lazy_static", + "libc", + "log", + "url", +] + [[package]] name = "futures" version = "0.3.30" @@ -634,6 +650,7 @@ dependencies = [ "crc", "criterion", "env_logger", + "fs-hdfs3", "futures", "g2p", "gsasl-sys", diff --git a/crates/hdfs-native/Cargo.toml b/crates/hdfs-native/Cargo.toml index e2dffee..e7d55cb 100644 --- a/crates/hdfs-native/Cargo.toml +++ b/crates/hdfs-native/Cargo.toml @@ -37,8 +37,9 @@ prost-build = { version = "0.11", optional = true } protobuf-src = { version = "1.1", optional = true } [dev-dependencies] -criterion = "0.5" +criterion = { version = "0.5", features = ["async_tokio", "async_futures"] } env_logger = "0.10" +fs-hdfs3 = "0.1.12" serial_test = "2.0.0" tempfile = "3" which = "4" @@ -49,8 +50,12 @@ token = ["gsasl-sys"] generate-protobuf = ["prost-build", "protobuf-src"] integration-test = ["which"] -benchmark = [] +benchmark = ["which"] [[bench]] name = "ec" harness = false + +[[bench]] +name = "read" +harness = false diff --git a/crates/hdfs-native/benches/read.rs b/crates/hdfs-native/benches/read.rs new file mode 100644 index 0000000..3dfa987 --- /dev/null +++ b/crates/hdfs-native/benches/read.rs @@ -0,0 +1,71 @@ +use std::collections::HashSet; + +use bytes::{BufMut, BytesMut}; +use criterion::*; +use hdfs::hdfs::{get_hdfs, get_hdfs_by_full_path}; +use hdfs_native::{minidfs::MiniDfs, Client, WriteOptions}; + +async fn write_file(client: &Client, ints: usize) { + let mut writer = client + .create("/bench", WriteOptions::default()) + .await + .unwrap(); + + let mut data = BytesMut::with_capacity(ints * 4); + for i in 0..ints { + data.put_u32(i as u32); + } + writer.write(data.freeze()).await.unwrap(); + writer.close().await.unwrap(); +} + +fn bench(c: &mut Criterion) { + let _ = env_logger::builder().is_test(true).try_init(); + + let _dfs = MiniDfs::with_features(&HashSet::new()); + let client = Client::default(); + + let ints_to_write: usize = 32 * 1024 * 1024; // 128 MiB file + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { write_file(&client, ints_to_write).await }); + + let fs = get_hdfs_by_full_path(&_dfs.url).unwrap(); + + let mut group = c.benchmark_group("read"); + group.throughput(Throughput::Bytes((ints_to_write * 4) as u64)); + group.sample_size(10); + + let reader = rt.block_on(client.read("/bench")).unwrap(); + group.bench_function("read-native", |b| { + b.to_async(&rt).iter(|| async { + // let reader = client.read("/bench").await.unwrap(); + + reader.read_range(0, reader.file_length()).await.unwrap() + }) + }); + group.sample_size(10); + group.bench_function("read-libhdfs", |b| { + b.iter(|| { + let mut buf = BytesMut::zeroed(ints_to_write * 4); + let mut bytes_read = 0; + let reader = fs.open("/bench").unwrap(); + + while bytes_read < ints_to_write * 4 { + bytes_read += reader + .read(&mut buf[bytes_read..ints_to_write * 4]) + .unwrap() as usize; + } + reader.close().unwrap(); + println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); + buf + }) + }); +} + +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/crates/hdfs-native/src/lib.rs b/crates/hdfs-native/src/lib.rs index 74c62d2..e447943 100644 --- a/crates/hdfs-native/src/lib.rs +++ b/crates/hdfs-native/src/lib.rs @@ -37,7 +37,7 @@ pub(crate) mod ec; pub(crate) mod error; pub mod file; pub(crate) mod hdfs; -#[cfg(feature = "integration-test")] +#[cfg(any(feature = "integration-test", feature = "benchmark"))] pub mod minidfs; pub(crate) mod proto; pub(crate) mod security; From ec341031b07b8677d3a0a33e190237e7090d2e12 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 22 Jan 2024 07:13:24 -0500 Subject: [PATCH 2/6] Trying ro figure out the difference --- crates/hdfs-native/benches/read.rs | 41 +++++++++++-------- .../minidfs/src/main/java/main/Main.java | 1 + .../src/main/resources/log4j.properties | 2 +- crates/hdfs-native/src/hdfs/block_reader.rs | 9 ++-- crates/hdfs-native/src/hdfs/connection.rs | 14 +++++++ crates/hdfs-native/src/minidfs.rs | 4 +- 6 files changed, 49 insertions(+), 22 deletions(-) diff --git a/crates/hdfs-native/benches/read.rs b/crates/hdfs-native/benches/read.rs index 3dfa987..29f491c 100644 --- a/crates/hdfs-native/benches/read.rs +++ b/crates/hdfs-native/benches/read.rs @@ -34,7 +34,15 @@ fn bench(c: &mut Criterion) { rt.block_on(async { write_file(&client, ints_to_write).await }); - let fs = get_hdfs_by_full_path(&_dfs.url).unwrap(); + let fs = get_hdfs().unwrap(); + println!( + "{:?}", + fs.list_status("/") + .unwrap() + .into_iter() + .map(|s| s.name().to_string()) + .collect::>() + ); let mut group = c.benchmark_group("read"); group.throughput(Throughput::Bytes((ints_to_write * 4) as u64)); @@ -49,22 +57,23 @@ fn bench(c: &mut Criterion) { }) }); group.sample_size(10); - group.bench_function("read-libhdfs", |b| { - b.iter(|| { - let mut buf = BytesMut::zeroed(ints_to_write * 4); - let mut bytes_read = 0; - let reader = fs.open("/bench").unwrap(); + // group.bench_function("read-libhdfs", |b| { + // b.iter(|| { + // let mut buf = BytesMut::zeroed(ints_to_write * 4); + // let mut bytes_read = 0; + // println!("{}", fs.used().unwrap()); + // let reader = fs.open("/bench").unwrap(); - while bytes_read < ints_to_write * 4 { - bytes_read += reader - .read(&mut buf[bytes_read..ints_to_write * 4]) - .unwrap() as usize; - } - reader.close().unwrap(); - println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); - buf - }) - }); + // while bytes_read < ints_to_write * 4 { + // bytes_read += reader + // .read(&mut buf[bytes_read..ints_to_write * 4]) + // .unwrap() as usize; + // } + // reader.close().unwrap(); + // println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); + // buf + // }) + // }); } criterion_group!(benches, bench); diff --git a/crates/hdfs-native/minidfs/src/main/java/main/Main.java b/crates/hdfs-native/minidfs/src/main/java/main/Main.java index 410577d..dd4aaeb 100644 --- a/crates/hdfs-native/minidfs/src/main/java/main/Main.java +++ b/crates/hdfs-native/minidfs/src/main/java/main/Main.java @@ -160,6 +160,7 @@ public static void main(String args[]) throws Exception { } } + hdfsConf.set("dfs.client.socketcache.capacity", "0"); hdfsConf.writeXml(new FileOutputStream("target/test/core-site.xml")); System.out.println("Ready!"); diff --git a/crates/hdfs-native/minidfs/src/main/resources/log4j.properties b/crates/hdfs-native/minidfs/src/main/resources/log4j.properties index 0893a26..bdcb9e4 100644 --- a/crates/hdfs-native/minidfs/src/main/resources/log4j.properties +++ b/crates/hdfs-native/minidfs/src/main/resources/log4j.properties @@ -1,5 +1,5 @@ # Define the root logger to the system property "hadoop.root.logger". -log4j.rootLogger=INFO,console +log4j.rootLogger=DEBUG,console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err diff --git a/crates/hdfs-native/src/hdfs/block_reader.rs b/crates/hdfs-native/src/hdfs/block_reader.rs index 1f09996..8f2729d 100644 --- a/crates/hdfs-native/src/hdfs/block_reader.rs +++ b/crates/hdfs-native/src/hdfs/block_reader.rs @@ -93,13 +93,16 @@ impl ReplicatedBlockStream { } async fn next_packet(&mut self) -> Result> { - if self.len == 0 { - return Ok(None); - } if self.connection.is_none() { self.select_next_datanode().await?; } let conn = self.connection.as_mut().unwrap(); + + if self.len == 0 { + conn.send_read_success().await?; + return Ok(None); + } + let packet = conn.read_packet().await?; let packet_offset = if self.offset > packet.header.offset_in_block as usize { diff --git a/crates/hdfs-native/src/hdfs/connection.rs b/crates/hdfs-native/src/hdfs/connection.rs index 3879fb6..7072010 100644 --- a/crates/hdfs-native/src/hdfs/connection.rs +++ b/crates/hdfs-native/src/hdfs/connection.rs @@ -587,6 +587,20 @@ impl DatanodeConnection { Ok(Packet::new(header, checksum, data)) } + pub(crate) async fn send_read_success(&mut self) -> Result<()> { + let client_read_status = hdfs::ClientReadStatusProto { + status: hdfs::Status::ChecksumOk as i32, + }; + + self.stream + .write_all(&client_read_status.encode_length_delimited_to_vec()) + .await?; + self.stream.flush().await?; + self.stream.shutdown().await?; + + Ok(()) + } + pub(crate) fn split(self) -> (DatanodeReader, DatanodeWriter) { let (reader, writer) = self.stream.into_inner().into_split(); let reader = DatanodeReader { diff --git a/crates/hdfs-native/src/minidfs.rs b/crates/hdfs-native/src/minidfs.rs index 2a4decd..2e93d04 100644 --- a/crates/hdfs-native/src/minidfs.rs +++ b/crates/hdfs-native/src/minidfs.rs @@ -67,7 +67,7 @@ impl MiniDfs { ]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + // .stderr(Stdio::null()) .spawn() .unwrap(); @@ -130,7 +130,7 @@ impl MiniDfs { "hdfs://127.0.0.1:9000" }; - env::set_var("HADOOP_CONF_DIR", "target/test"); + // env::set_var("HADOOP_CONF_DIR", "target/test"); MiniDfs { process: child, From 701f32d90f15008f4a9ac080817682f64e4f1c03 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 14 Feb 2024 19:29:06 -0500 Subject: [PATCH 3/6] Add benchmarks --- crates/hdfs-native/Cargo.toml | 8 +- crates/hdfs-native/benches/io.rs | 112 ++++++++++++++++++ crates/hdfs-native/benches/read.rs | 80 ------------- crates/hdfs-native/benches/rpc.rs | 41 +++++++ .../minidfs/src/main/java/main/Main.java | 1 - .../src/main/resources/log4j.properties | 2 +- crates/hdfs-native/src/minidfs.rs | 2 +- 7 files changed, 161 insertions(+), 85 deletions(-) create mode 100644 crates/hdfs-native/benches/io.rs delete mode 100644 crates/hdfs-native/benches/read.rs create mode 100644 crates/hdfs-native/benches/rpc.rs diff --git a/crates/hdfs-native/Cargo.toml b/crates/hdfs-native/Cargo.toml index ca8794c..f13240c 100644 --- a/crates/hdfs-native/Cargo.toml +++ b/crates/hdfs-native/Cargo.toml @@ -26,7 +26,7 @@ prost-types = "0.12" roxmltree = "0.18" socket2 = "0.5" thiserror = "1" -tokio = { workspace = true, features = ["rt", "net", "io-util", "macros", "sync", "time"] } +tokio = { workspace = true, features = ["rt", "rt-multi-thread", "net", "io-util", "macros", "sync", "time"] } url = "2" users = { version = "0.11", default-features = false } uuid = { version = "1", features = ["v4"] } @@ -57,5 +57,9 @@ name = "ec" harness = false [[bench]] -name = "read" +name = "io" harness = false + +[[bench]] +name = "rpc" +harness = false \ No newline at end of file diff --git a/crates/hdfs-native/benches/io.rs b/crates/hdfs-native/benches/io.rs new file mode 100644 index 0000000..f43093f --- /dev/null +++ b/crates/hdfs-native/benches/io.rs @@ -0,0 +1,112 @@ +use std::collections::HashSet; + +use bytes::{Buf, BufMut, BytesMut}; +use criterion::*; +use hdfs::hdfs::get_hdfs; +use hdfs_native::{minidfs::MiniDfs, Client, WriteOptions}; + +async fn write_file(client: &Client, ints: usize) { + let mut writer = client + .create("/bench", WriteOptions::default()) + .await + .unwrap(); + + let mut data = BytesMut::with_capacity(ints * 4); + for i in 0..ints { + data.put_u32(i as u32); + } + writer.write(data.freeze()).await.unwrap(); + writer.close().await.unwrap(); +} + +fn bench(c: &mut Criterion) { + let _ = env_logger::builder().is_test(true).try_init(); + + let _dfs = MiniDfs::with_features(&HashSet::new()); + let client = Client::default(); + + let ints_to_write: usize = 128 * 1024 * 1024; // 128 MiB file + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { write_file(&client, ints_to_write).await }); + + let fs = get_hdfs().unwrap(); + + let mut group = c.benchmark_group("read"); + group.throughput(Throughput::Bytes((ints_to_write * 4) as u64)); + group.sample_size(10); + + let reader = rt.block_on(client.read("/bench")).unwrap(); + group.bench_function("read-native", |b| { + b.to_async(&rt).iter(|| async { + // let reader = client.read("/bench").await.unwrap(); + + reader.read_range(0, reader.file_length()).await.unwrap() + }) + }); + group.sample_size(10); + group.bench_function("read-libhdfs", |b| { + b.iter(|| { + let mut buf = BytesMut::zeroed(ints_to_write * 4); + let mut bytes_read = 0; + let fs = get_hdfs().unwrap(); + println!("{}", fs.used().unwrap()); + let reader = fs.open("/bench").unwrap(); + + while bytes_read < ints_to_write * 4 { + bytes_read += reader + .read(&mut buf[bytes_read..ints_to_write * 4]) + .unwrap() as usize; + } + reader.close().unwrap(); + println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); + buf + }) + }); + + let mut data_to_write = BytesMut::with_capacity(ints_to_write * 4); + for i in 0..ints_to_write { + data_to_write.put_i32(i as i32); + } + + let buf = data_to_write.freeze(); + + drop(group); + + let mut group = c.benchmark_group("write"); + group.throughput(Throughput::Bytes((ints_to_write * 4) as u64)); + group.sample_size(10); + + group.bench_function("write-native", |b| { + b.to_async(&rt).iter(|| async { + let mut writer = client + .create("/bench-write", WriteOptions::default().overwrite(true)) + .await + .unwrap(); + + writer.write(buf.clone()).await.unwrap(); + writer.close().await.unwrap(); + }) + }); + + group.sample_size(10); + group.bench_function("write-libhdfs", |b| { + b.iter(|| { + let mut buf = buf.clone(); + let writer = fs.create_with_overwrite("/bench-write", true).unwrap(); + + while buf.remaining() > 0 { + let written = writer.write(&buf[..]).unwrap(); + buf.advance(written as usize); + } + writer.close().unwrap(); + }) + }); +} + +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/crates/hdfs-native/benches/read.rs b/crates/hdfs-native/benches/read.rs deleted file mode 100644 index 29f491c..0000000 --- a/crates/hdfs-native/benches/read.rs +++ /dev/null @@ -1,80 +0,0 @@ -use std::collections::HashSet; - -use bytes::{BufMut, BytesMut}; -use criterion::*; -use hdfs::hdfs::{get_hdfs, get_hdfs_by_full_path}; -use hdfs_native::{minidfs::MiniDfs, Client, WriteOptions}; - -async fn write_file(client: &Client, ints: usize) { - let mut writer = client - .create("/bench", WriteOptions::default()) - .await - .unwrap(); - - let mut data = BytesMut::with_capacity(ints * 4); - for i in 0..ints { - data.put_u32(i as u32); - } - writer.write(data.freeze()).await.unwrap(); - writer.close().await.unwrap(); -} - -fn bench(c: &mut Criterion) { - let _ = env_logger::builder().is_test(true).try_init(); - - let _dfs = MiniDfs::with_features(&HashSet::new()); - let client = Client::default(); - - let ints_to_write: usize = 32 * 1024 * 1024; // 128 MiB file - - let rt = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - .unwrap(); - - rt.block_on(async { write_file(&client, ints_to_write).await }); - - let fs = get_hdfs().unwrap(); - println!( - "{:?}", - fs.list_status("/") - .unwrap() - .into_iter() - .map(|s| s.name().to_string()) - .collect::>() - ); - - let mut group = c.benchmark_group("read"); - group.throughput(Throughput::Bytes((ints_to_write * 4) as u64)); - group.sample_size(10); - - let reader = rt.block_on(client.read("/bench")).unwrap(); - group.bench_function("read-native", |b| { - b.to_async(&rt).iter(|| async { - // let reader = client.read("/bench").await.unwrap(); - - reader.read_range(0, reader.file_length()).await.unwrap() - }) - }); - group.sample_size(10); - // group.bench_function("read-libhdfs", |b| { - // b.iter(|| { - // let mut buf = BytesMut::zeroed(ints_to_write * 4); - // let mut bytes_read = 0; - // println!("{}", fs.used().unwrap()); - // let reader = fs.open("/bench").unwrap(); - - // while bytes_read < ints_to_write * 4 { - // bytes_read += reader - // .read(&mut buf[bytes_read..ints_to_write * 4]) - // .unwrap() as usize; - // } - // reader.close().unwrap(); - // println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); - // buf - // }) - // }); -} - -criterion_group!(benches, bench); -criterion_main!(benches); diff --git a/crates/hdfs-native/benches/rpc.rs b/crates/hdfs-native/benches/rpc.rs new file mode 100644 index 0000000..0b7cd29 --- /dev/null +++ b/crates/hdfs-native/benches/rpc.rs @@ -0,0 +1,41 @@ +use std::collections::HashSet; + +use criterion::*; +use hdfs::hdfs::get_hdfs; +use hdfs_native::{minidfs::MiniDfs, Client, WriteOptions}; + +fn bench(c: &mut Criterion) { + let _ = env_logger::builder().is_test(true).try_init(); + + let _dfs = MiniDfs::with_features(&HashSet::new()); + let client = Client::default(); + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { + client + .create("/bench", WriteOptions::default()) + .await + .unwrap() + .close() + .await + .unwrap(); + }); + + let fs = get_hdfs().unwrap(); + + let mut group = c.benchmark_group("rpc"); + group.bench_function("getFileInfo-native", |b| { + b.to_async(&rt) + .iter(|| async { client.get_file_info("/bench").await.unwrap() }) + }); + group.bench_function("getFileInfo-libhdfs", |b| { + b.iter(|| fs.get_file_status("/bench").unwrap()) + }); +} + +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/crates/hdfs-native/minidfs/src/main/java/main/Main.java b/crates/hdfs-native/minidfs/src/main/java/main/Main.java index dd4aaeb..410577d 100644 --- a/crates/hdfs-native/minidfs/src/main/java/main/Main.java +++ b/crates/hdfs-native/minidfs/src/main/java/main/Main.java @@ -160,7 +160,6 @@ public static void main(String args[]) throws Exception { } } - hdfsConf.set("dfs.client.socketcache.capacity", "0"); hdfsConf.writeXml(new FileOutputStream("target/test/core-site.xml")); System.out.println("Ready!"); diff --git a/crates/hdfs-native/minidfs/src/main/resources/log4j.properties b/crates/hdfs-native/minidfs/src/main/resources/log4j.properties index bdcb9e4..0893a26 100644 --- a/crates/hdfs-native/minidfs/src/main/resources/log4j.properties +++ b/crates/hdfs-native/minidfs/src/main/resources/log4j.properties @@ -1,5 +1,5 @@ # Define the root logger to the system property "hadoop.root.logger". -log4j.rootLogger=DEBUG,console +log4j.rootLogger=INFO,console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err diff --git a/crates/hdfs-native/src/minidfs.rs b/crates/hdfs-native/src/minidfs.rs index 2e93d04..6c919ab 100644 --- a/crates/hdfs-native/src/minidfs.rs +++ b/crates/hdfs-native/src/minidfs.rs @@ -67,7 +67,7 @@ impl MiniDfs { ]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - // .stderr(Stdio::null()) + .stderr(Stdio::null()) .spawn() .unwrap(); From 27a057746203fa5b8042115585b8e1ad987fe38a Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 14 Feb 2024 19:33:33 -0500 Subject: [PATCH 4/6] Remove prints --- crates/hdfs-native/benches/io.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/hdfs-native/benches/io.rs b/crates/hdfs-native/benches/io.rs index f43093f..16094a4 100644 --- a/crates/hdfs-native/benches/io.rs +++ b/crates/hdfs-native/benches/io.rs @@ -54,7 +54,6 @@ fn bench(c: &mut Criterion) { let mut buf = BytesMut::zeroed(ints_to_write * 4); let mut bytes_read = 0; let fs = get_hdfs().unwrap(); - println!("{}", fs.used().unwrap()); let reader = fs.open("/bench").unwrap(); while bytes_read < ints_to_write * 4 { @@ -63,7 +62,6 @@ fn bench(c: &mut Criterion) { .unwrap() as usize; } reader.close().unwrap(); - println!("{:?}", &buf[ints_to_write * 4 - 16..ints_to_write * 4]); buf }) }); From 4c93b5f77032d9eb494edd8f4f21bc8107030fbb Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 14 Feb 2024 21:55:30 -0500 Subject: [PATCH 5/6] Add env var back --- crates/hdfs-native/src/minidfs.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/hdfs-native/src/minidfs.rs b/crates/hdfs-native/src/minidfs.rs index 6c919ab..8bc0389 100644 --- a/crates/hdfs-native/src/minidfs.rs +++ b/crates/hdfs-native/src/minidfs.rs @@ -130,8 +130,7 @@ impl MiniDfs { "hdfs://127.0.0.1:9000" }; - // env::set_var("HADOOP_CONF_DIR", "target/test"); - + env::set_var("HADOOP_CONF_DIR", "target/test"); MiniDfs { process: child, url: url.to_string(), From 7e00ad3451b72c428f9e6e5fe40eb44c57d1046f Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Thu, 15 Feb 2024 08:01:50 -0500 Subject: [PATCH 6/6] Update readme --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 73d656d..1c2e8ff 100644 --- a/README.md +++ b/README.md @@ -71,4 +71,17 @@ cargo test -p hdfs-native --features token,kerberos,intergation-test ``` ### Python tests -See the [Python README](./python/README.md) \ No newline at end of file +See the [Python README](./python/README.md) + +## Running benchmarks +Some of the benchmarks compare performance to the JVM based client through libhdfs via the fs-hdfs3 crate. Because of that, some extra setup is required to run the benchmarks: + +```bash +export HADOOP_CONF_DIR=$(pwd)/crates/hdfs-native/target/test +export CLASSPATH=$(hadoop classpath) +``` + +then you can run the benchmarks with +```bash +cargo bench -p hdfs-native --features benchmark,integration-test +``` \ No newline at end of file