From 25c34f9ecc225aa6e38a1534ab362a239135b703 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Mon, 16 Sep 2024 19:32:54 +0100 Subject: [PATCH] cleanup `array_has` (#12460) * cleanup "array_has" * revert array_has_dispatch_for_scalar to use vec --- datafusion/functions-nested/src/array_has.rs | 62 ++++++++------------ 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index dec964df2129..8f8d123bf5f9 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -96,44 +96,33 @@ impl ScalarUDFImpl for ArrayHas { } fn invoke(&self, args: &[ColumnarValue]) -> Result { - // Always return null if the second argumet is null - // i.e. array_has(array, null) -> null - if let ColumnarValue::Scalar(s) = &args[1] { - if s.is_null() { - return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + match &args[1] { + ColumnarValue::Array(array_needle) => { + // the needle is already an array, convert the haystack to an array of the same length + let haystack = args[0].to_owned().into_array(array_needle.len())?; + let array = array_has_inner_for_array(&haystack, array_needle)?; + Ok(ColumnarValue::Array(array)) } - } - - // first, identify if any of the arguments is an Array. If yes, store its `len`, - // as any scalar will need to be converted to an array of len `len`. - let len = args - .iter() - .fold(Option::::None, |acc, arg| match arg { - ColumnarValue::Scalar(_) => acc, - ColumnarValue::Array(a) => Some(a.len()), - }); - - let is_scalar = len.is_none(); - - let result = match args[1] { - ColumnarValue::Array(_) => { - let args = ColumnarValue::values_to_arrays(args)?; - array_has_inner_for_array(&args[0], &args[1]) - } - ColumnarValue::Scalar(_) => { + ColumnarValue::Scalar(scalar_needle) => { + // Always return null if the second argument is null + // i.e. array_has(array, null) -> null + if scalar_needle.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + } + + // since the needle is a scalar, convert it to an array of size 1 let haystack = args[0].to_owned().into_array(1)?; - let needle = args[1].to_owned().into_array(1)?; + let needle = scalar_needle.to_array_of_size(1)?; let needle = Scalar::new(needle); - array_has_inner_for_scalar(&haystack, &needle) + let array = array_has_inner_for_scalar(&haystack, &needle)?; + if let ColumnarValue::Scalar(_) = &args[0] { + // If both inputs are scalar, keeps output as scalar + let scalar_value = ScalarValue::try_from_array(&array, 0)?; + Ok(ColumnarValue::Scalar(scalar_value)) + } else { + Ok(ColumnarValue::Array(array)) + } } - }; - - if is_scalar { - // If all inputs are scalar, keeps output as scalar - let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); - result.map(ColumnarValue::Scalar) - } else { - result.map(ColumnarValue::Array) } } @@ -218,10 +207,9 @@ fn array_has_dispatch_for_scalar( } let sliced_array = eq_array.slice(start, length); // For nested list, check number of nulls - if sliced_array.null_count() == length { - continue; + if sliced_array.null_count() != length { + final_contained[i] = Some(sliced_array.true_count() > 0); } - final_contained[i] = Some(sliced_array.true_count() > 0); } Ok(Arc::new(BooleanArray::from(final_contained)))