From 3c61372d7ac76bba149316b1fbad6e981752e502 Mon Sep 17 00:00:00 2001 From: Jk Xu <54522439+Dousir9@users.noreply.github.com> Date: Wed, 18 Oct 2023 15:26:47 +0800 Subject: [PATCH 1/2] fix bitmap new_trued (#1580) --- src/bitmap/immutable.rs | 2 +- tests/it/bitmap/immutable.rs | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index 57502e911e..6883d3312f 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -307,7 +307,7 @@ impl Bitmap { // just set each byte to u8::MAX // we will not access data with index >= length let bytes = vec![0b11111111u8; length.saturating_add(7) / 8]; - unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, length) } + unsafe { Bitmap::from_inner_unchecked(Arc::new(bytes.into()), 0, length, 0) } } /// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits. diff --git a/tests/it/bitmap/immutable.rs b/tests/it/bitmap/immutable.rs index 5e6157413e..cc003009e0 100644 --- a/tests/it/bitmap/immutable.rs +++ b/tests/it/bitmap/immutable.rs @@ -32,6 +32,25 @@ fn as_slice_offset_middle() { assert_eq!(length, 5); } +#[test] +fn new_constant() { + let b = Bitmap::new_constant(true, 9); + let (slice, offset, length) = b.as_slice(); + assert_eq!(slice[0], 0b11111111); + assert!((slice[1] & 0b00000001) > 0); + assert_eq!(offset, 0); + assert_eq!(length, 9); + assert_eq!(b.unset_bits(), 0); + + let b = Bitmap::new_constant(false, 9); + let (slice, offset, length) = b.as_slice(); + assert_eq!(slice[0], 0b00000000); + assert!((slice[1] & 0b00000001) == 0); + assert_eq!(offset, 0); + assert_eq!(length, 9); + assert_eq!(b.unset_bits(), 9); +} + #[test] fn debug() { let b = Bitmap::from([true, true, false, true, true, true, true, true, true]); From 9a26422d00b83c65245f75e02eb436dedd91b5b8 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 19 Oct 2023 17:19:15 -0700 Subject: [PATCH 2/2] chore: add max bytes_estimate to reserve the capacity of binary (#1581) --- src/io/parquet/read/deserialize/binary/utils.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/io/parquet/read/deserialize/binary/utils.rs b/src/io/parquet/read/deserialize/binary/utils.rs index ec514766fa..a48063c56e 100644 --- a/src/io/parquet/read/deserialize/binary/utils.rs +++ b/src/io/parquet/read/deserialize/binary/utils.rs @@ -48,7 +48,8 @@ impl Binary { if self.offsets.len_proxy() == 100 && self.offsets.capacity() > 100 { let bytes_per_row = self.values.len() / 100 + 1; let bytes_estimate = bytes_per_row * self.offsets.capacity(); - if bytes_estimate > self.values.capacity() { + + if bytes_estimate > self.values.capacity() && bytes_estimate < 10 * 1024 * 1024 { self.values.reserve(bytes_estimate - self.values.capacity()); } }