diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index 09ba0cd892b9..7edf9cb7c5cc 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -304,10 +304,10 @@ mod tests { use std::sync::Arc; use crate::array::{ - array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, - DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, - Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, - StringDictionaryBuilder, StringOffsetSizeTrait, StructArray, + array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, + BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, + GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, + StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray, }; use crate::array::{GenericStringArray, Int32Array}; use crate::buffer::Buffer; @@ -1297,4 +1297,38 @@ mod tests { ); test_equal(&a, &b, false); } + + #[test] + fn test_non_null_empty_strings() { + let s = StringArray::from(vec![Some(""), Some(""), Some("")]); + + let string1 = s.data(); + + let string2 = ArrayData::builder(DataType::Utf8) + .len(string1.len()) + .buffers(string1.buffers().to_vec()) + .build() + .unwrap(); + + // string2 is identical to string1 except that it has no validity buffer but since there + // are no nulls, string1 and string2 are equal + test_equal(string1, &string2, true); + } + + #[test] + fn test_null_empty_strings() { + let s = StringArray::from(vec![Some(""), None, Some("")]); + + let string1 = s.data(); + + let string2 = ArrayData::builder(DataType::Utf8) + .len(string1.len()) + .buffers(string1.buffers().to_vec()) + .build() + .unwrap(); + + // string2 is identical to string1 except that it has no validity buffer since string1 has + // nulls in it, string1 and string2 are not equal + test_equal(string1, &string2, false); + } } diff --git a/arrow/src/array/equal/variable_size.rs b/arrow/src/array/equal/variable_size.rs index ecb3bc2a3c20..946f107f3971 100644 --- a/arrow/src/array/equal/variable_size.rs +++ b/arrow/src/array/equal/variable_size.rs @@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal( let lhs_pos = lhs_start + i; let rhs_pos = rhs_start + i; - // the null bits can still be `None`, so we don't unwrap + // the null bits can still be `None`, indicating that the value is valid. let lhs_is_null = !lhs_nulls .map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos)) - .unwrap_or(false); + .unwrap_or(true); let rhs_is_null = !rhs_nulls .map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos)) - .unwrap_or(false); + .unwrap_or(true); lhs_is_null || (lhs_is_null == rhs_is_null)