Skip to content

Commit

Permalink
rewrite Bitpack::encode
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcasale committed Apr 29, 2024
1 parent f9423c8 commit 15644d8
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 25 deletions.
32 changes: 32 additions & 0 deletions vortex-fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,38 @@ impl EncodingCompression for BitPackedEncoding {
}
}

pub(crate) fn bitpack_encode(array: PrimitiveArray<'_>) -> VortexResult<OwnedArray> {
let bit_width_freq = array
.statistics()
.compute_as::<ListScalarVec<usize>>(Stat::BitWidthFreq)
.ok_or_else(|| vortex_err!("Could not compute bit width frequencies"))?
.0;
let bit_width = best_bit_width(&bit_width_freq, bytes_per_exception(array.ptype()));
let num_exceptions = count_exceptions(bit_width, &bit_width_freq);

if bit_width == array.ptype().bit_width() {
// Nothing we can do
return Ok(array.into_array().to_static());
}

let packed = bitpack(&array, bit_width)?;
let patches = if num_exceptions > 0 {
Some(bitpack_patches(&array, bit_width, num_exceptions))
} else {
None
};

BitPackedArray::try_new(
packed,
array.validity(),
patches,
bit_width,
array.dtype().clone(),
array.len(),
)
.map(|a| a.into_array())
}

pub(crate) fn bitpack(parray: &PrimitiveArray, bit_width: usize) -> VortexResult<OwnedArray> {
// We know the min is > 0, so it's safe to re-interpret signed integers as unsigned.
// TODO(ngates): we should implement this using a vortex cast to centralize this hack.
Expand Down
42 changes: 24 additions & 18 deletions vortex-fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,22 +123,12 @@ impl BitPackedArray<'_> {
))
}

pub fn encode(
parray: &PrimitiveArray<'_>,
validity: Validity,
patches: Option<Array>,
bit_width: usize,
) -> VortexResult<OwnedArray> {
let packed = bitpack(parray, bit_width)?;
BitPackedArray::try_new(
packed,
validity,
patches,
bit_width,
parray.dtype().clone(),
parray.len(),
)
.map(|a| a.into_array())
pub fn encode(array: Array<'_>) -> VortexResult<OwnedArray> {
if let Ok(parray) = PrimitiveArray::try_from(array) {
Ok(bitpack_encode(parray)?)
} else {
vortex_bail!("Bitpacking can only encode primitive arrays");
}
}
}

Expand Down Expand Up @@ -212,9 +202,10 @@ mod test {
use vortex::compress::{CompressConfig, CompressCtx};
use vortex::compute::scalar_at::scalar_at;
use vortex::compute::slice::slice;
use vortex::encoding::EncodingRef;
use vortex::encoding::{ArrayEncoding, EncodingRef};
use vortex::IntoArray;

use crate::BitPackedEncoding;
use crate::{BitPackedArray, BitPackedEncoding};

#[test]
fn slice_within_block() {
Expand Down Expand Up @@ -267,4 +258,19 @@ mod test {
((9215 % 63) as u8).into()
);
}

#[test]
fn test_encode() {
let values = vec![Some(1), None, Some(1), None, Some(1), None, Some(u64::MAX)];
let uncompressed = PrimitiveArray::from_nullable_vec(values);
let packed = BitPackedArray::encode(uncompressed.into_array()).unwrap();
assert_eq!(packed.encoding().id(), BitPackedEncoding.id());
let expected = &[1, 0, 1, 0, 1, 0, u64::MAX];
let results = packed
.flatten_primitive()
.unwrap()
.typed_data::<u64>()
.to_vec();
assert_eq!(results, expected);
}
}
11 changes: 4 additions & 7 deletions vortex-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,6 @@ mod tests {
use vortex::array::primitive::{Primitive, PrimitiveArray, PrimitiveEncoding};
use vortex::encoding::{ArrayEncoding, EncodingId};
use vortex::ptype::NativePType;
use vortex::validity::Validity;
use vortex::{Array, ArrayDType, ArrayDef, IntoArray, OwnedArray, SerdeContext};
use vortex_alp::{ALPArray, ALPEncoding};
use vortex_error::VortexResult;
Expand Down Expand Up @@ -441,9 +440,10 @@ mod tests {
.collect_vec(),
)
.into_array();
let apl_encoded = ALPArray::encode(pdata).unwrap();
let alp_encoded = ALPArray::encode(pdata).unwrap();
assert_eq!(alp_encoded.encoding().id(), ALPEncoding.id());
test_base_case(
&apl_encoded,
&alp_encoded,
&[
2999989.5f64,
2999988.5,
Expand Down Expand Up @@ -485,11 +485,8 @@ mod tests {
fn test_write_read_bitpacked() {
// NB: the order is reversed here to ensure we aren't grabbing indexes instead of values
let uncompressed = PrimitiveArray::from((0i64..3_000).rev().collect_vec());
// NB: bit_width here must be >= 2^ceil(log2(MAX_VALUE)) for correct packing w/o patches
let packed = BitPackedArray::encode(&uncompressed, Validity::AllValid, None, 12).unwrap();

let packed = BitPackedArray::encode(uncompressed.into_array()).unwrap();
assert_eq!(packed.encoding().id(), BitPackedEncoding.id());

let indices = PrimitiveArray::from(vec![1i32, 2, 3, 4, 5, 6, 7, 7, 7, 8]).into_array();
let array = test_read_write_inner(&packed, &indices).unwrap();
let expected = &[2998, 2997, 2996, 2995, 2994, 2993, 2992, 2992, 2992, 2991];
Expand Down

0 comments on commit 15644d8

Please sign in to comment.