Skip to content

Commit

Permalink
fix a bug in variable sized equality (#1209)
Browse files Browse the repository at this point in the history
A missing validity buffer was being treated as all values being null,
rather than all values being valid, causing equality to fail on some
equivalent string and binary arrays.
  • Loading branch information
helgikrs authored Jan 19, 2022
1 parent b2416ff commit 21693a4
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
42 changes: 38 additions & 4 deletions arrow/src/array/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,10 @@ mod tests {
use std::sync::Arc;

use crate::array::{
array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray,
DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait,
BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder,
StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
};
use crate::array::{GenericStringArray, Int32Array};
use crate::buffer::Buffer;
Expand Down Expand Up @@ -1297,4 +1297,38 @@ mod tests {
);
test_equal(&a, &b, false);
}

#[test]
fn test_non_null_empty_strings() {
let s = StringArray::from(vec![Some(""), Some(""), Some("")]);

let string1 = s.data();

let string2 = ArrayData::builder(DataType::Utf8)
.len(string1.len())
.buffers(string1.buffers().to_vec())
.build()
.unwrap();

// string2 is identical to string1 except that it has no validity buffer but since there
// are no nulls, string1 and string2 are equal
test_equal(string1, &string2, true);
}

#[test]
fn test_null_empty_strings() {
let s = StringArray::from(vec![Some(""), None, Some("")]);

let string1 = s.data();

let string2 = ArrayData::builder(DataType::Utf8)
.len(string1.len())
.buffers(string1.buffers().to_vec())
.build()
.unwrap();

// string2 is identical to string1 except that it has no validity buffer since string1 has
// nulls in it, string1 and string2 are not equal
test_equal(string1, &string2, false);
}
}
6 changes: 3 additions & 3 deletions arrow/src/array/equal/variable_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

// the null bits can still be `None`, so we don't unwrap
// the null bits can still be `None`, indicating that the value is valid.
let lhs_is_null = !lhs_nulls
.map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
.unwrap_or(false);
.unwrap_or(true);
let rhs_is_null = !rhs_nulls
.map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
.unwrap_or(false);
.unwrap_or(true);

lhs_is_null
|| (lhs_is_null == rhs_is_null)
Expand Down

0 comments on commit 21693a4

Please sign in to comment.