From 854ed60350a3c87fa95ebd8f7ce9cded8d41551c Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 3 Sep 2024 23:17:48 +0200 Subject: [PATCH] fix: coalesce schema issues closes #12307 --- datafusion/core/src/dataframe/mod.rs | 37 +++++++ datafusion/expr/src/expr_schema.rs | 23 ++-- datafusion/expr/src/logical_plan/builder.rs | 4 +- datafusion/functions/src/core/coalesce.rs | 30 ++++- .../functions/src/datetime/to_local_time.rs | 57 +++++----- datafusion/functions/src/encoding/inner.rs | 103 ++++++++++-------- datafusion/functions/src/unicode/strpos.rs | 36 +++--- datafusion/functions/src/unicode/substr.rs | 77 ++++++++++--- .../sqllogictest/test_files/encoding.slt | 8 +- .../sqllogictest/test_files/functions.slt | 61 +++++++++++ datafusion/sqllogictest/test_files/scalar.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 19 +++- .../test_files/string/string_literal.slt | 4 +- .../sqllogictest/test_files/timestamps.slt | 2 +- 14 files changed, 335 insertions(+), 128 deletions(-) diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index b52dcaa2f7fd..72b763ce0f2b 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -2029,6 +2029,43 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_coalesce_schema() -> Result<()> { + let ctx = SessionContext::new(); + + let query = r#"SELECT COALESCE(null, 5)"#; + + let result = ctx.sql(query).await?; + assert_logical_expr_schema_eq_physical_expr_schema(result).await?; + Ok(()) + } + + #[tokio::test] + async fn test_coalesce_from_values_schema() -> Result<()> { + let ctx = SessionContext::new(); + + let query = r#"SELECT COALESCE(column1, column2) FROM VALUES (null, 1.2)"#; + + let result = ctx.sql(query).await?; + assert_logical_expr_schema_eq_physical_expr_schema(result).await?; + Ok(()) + } + + #[tokio::test] + async fn test_coalesce_from_values_schema_multiple_rows() -> Result<()> { + let ctx = SessionContext::new(); + + let query = r#"SELECT COALESCE(column1, column2) + FROM VALUES + (null, 1.2), + (1.1, null), + (2, 5);"#; + + let result = ctx.sql(query).await?; + assert_logical_expr_schema_eq_physical_expr_schema(result).await?; + Ok(()) + } + #[tokio::test] async fn test_array_agg_schema() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index f40ac409dd43..ad617c53d617 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -151,21 +151,22 @@ impl ExprSchemable for Expr { .collect::>>()?; // verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` - data_types_with_scalar_udf(&arg_data_types, func).map_err(|err| { - plan_datafusion_err!( - "{} {}", - err, - utils::generate_signature_error_msg( - func.name(), - func.signature().clone(), - &arg_data_types, + let new_data_types = data_types_with_scalar_udf(&arg_data_types, func) + .map_err(|err| { + plan_datafusion_err!( + "{} {}", + err, + utils::generate_signature_error_msg( + func.name(), + func.signature().clone(), + &arg_data_types, + ) ) - ) - })?; + })?; // perform additional function arguments validation (due to limited // expressiveness of `TypeSignature`), then infer return type - Ok(func.return_type_from_exprs(args, schema, &arg_data_types)?) + Ok(func.return_type_from_exprs(args, schema, &new_data_types)?) } Expr::WindowFunction(window_function) => self .data_type_and_nullable_with_window_function(schema, window_function) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index c38ce2d02fd2..dba7c2e98378 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -216,7 +216,9 @@ impl LogicalPlanBuilder { common_type = Some(data_type); } } - field_types.push(common_type.unwrap_or(DataType::Utf8)); + // assuming common_type was not set, and no error, therefore the type should be NULL + // since the code loop skips NULL + field_types.push(common_type.unwrap_or(DataType::Null)); } // wrap cast if data type is not same as common type. for row in &mut values { diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index 19db58c181e7..2fa6d7c197ad 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -21,11 +21,11 @@ use arrow::array::{new_null_array, BooleanArray}; use arrow::compute::kernels::zip::zip; use arrow::compute::{and, is_not_null, is_null}; use arrow::datatypes::DataType; - use datafusion_common::{exec_err, ExprSchema, Result}; use datafusion_expr::type_coercion::binary::type_union_resolution; use datafusion_expr::{ColumnarValue, Expr, ExprSchemable}; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use itertools::Itertools; #[derive(Debug)] pub struct CoalesceFunc { @@ -60,12 +60,16 @@ impl ScalarUDFImpl for CoalesceFunc { } fn return_type(&self, arg_types: &[DataType]) -> Result { - Ok(arg_types[0].clone()) + Ok(arg_types + .iter() + .find_or_first(|d| !d.is_null()) + .unwrap() + .clone()) } - // If all the element in coalesce is non-null, the result is non-null + // If any the arguments in coalesce is non-null, the result is non-null fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool { - args.iter().any(|e| e.nullable(schema).ok().unwrap_or(true)) + args.iter().all(|e| e.nullable(schema).ok().unwrap_or(true)) } /// coalesce evaluates to the first value which is not NULL @@ -154,4 +158,22 @@ mod test { .unwrap(); assert_eq!(return_type, DataType::Date32); } + + #[test] + fn test_coalesce_return_types_with_nulls_first() { + let coalesce = core::coalesce::CoalesceFunc::new(); + let return_type = coalesce + .return_type(&[DataType::Null, DataType::Date32]) + .unwrap(); + assert_eq!(return_type, DataType::Date32); + } + + #[test] + fn test_coalesce_return_types_with_nulls_last() { + let coalesce = core::coalesce::CoalesceFunc::new(); + let return_type = coalesce + .return_type(&[DataType::Int64, DataType::Null]) + .unwrap(); + assert_eq!(return_type, DataType::Int64); + } } diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 634e28e6f393..0e33da14547e 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -22,22 +22,16 @@ use std::sync::Arc; use arrow::array::timezone::Tz; use arrow::array::{Array, ArrayRef, PrimitiveBuilder}; use arrow::datatypes::DataType::Timestamp; +use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{ ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; -use arrow::datatypes::{ - TimeUnit, - TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}, -}; use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; use datafusion_common::cast::as_primitive_array; -use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; -use datafusion_expr::TypeSignature::Exact; -use datafusion_expr::{ - ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, -}; +use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue}; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; /// A UDF function that converts a timezone-aware timestamp to local time (with no offset or /// timezone information). In other words, this function strips off the timezone from the timestamp, @@ -55,20 +49,8 @@ impl Default for ToLocalTimeFunc { impl ToLocalTimeFunc { pub fn new() -> Self { - let base_sig = |array_type: TimeUnit| { - [ - Exact(vec![Timestamp(array_type, None)]), - Exact(vec![Timestamp(array_type, Some(TIMEZONE_WILDCARD.into()))]), - ] - }; - - let full_sig = [Nanosecond, Microsecond, Millisecond, Second] - .into_iter() - .flat_map(base_sig) - .collect::>(); - Self { - signature: Signature::one_of(full_sig, Volatility::Immutable), + signature: Signature::user_defined(Volatility::Immutable), } } @@ -328,13 +310,10 @@ impl ScalarUDFImpl for ToLocalTimeFunc { } match &arg_types[0] { - Timestamp(Nanosecond, _) => Ok(Timestamp(Nanosecond, None)), - Timestamp(Microsecond, _) => Ok(Timestamp(Microsecond, None)), - Timestamp(Millisecond, _) => Ok(Timestamp(Millisecond, None)), - Timestamp(Second, _) => Ok(Timestamp(Second, None)), + Timestamp(timeunit, _) => Ok(Timestamp(*timeunit, None)), _ => exec_err!( "The to_local_time function can only accept timestamp as the arg, got {:?}", arg_types[0] - ), + ) } } @@ -348,6 +327,30 @@ impl ScalarUDFImpl for ToLocalTimeFunc { self.to_local_time(args) } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 1 { + return plan_err!( + "to_local_time function requires 1 argument, got {:?}", + arg_types.len() + ); + } + + let first_arg = arg_types[0].clone(); + match &first_arg { + Timestamp(Nanosecond, timezone) => { + Ok(vec![Timestamp(Nanosecond, timezone.clone())]) + } + Timestamp(Microsecond, timezone) => { + Ok(vec![Timestamp(Microsecond, timezone.clone())]) + } + Timestamp(Millisecond, timezone) => { + Ok(vec![Timestamp(Millisecond, timezone.clone())]) + } + Timestamp(Second, timezone) => Ok(vec![Timestamp(Second, timezone.clone())]), + _ => plan_err!("The to_local_time function can only accept Timestamp as the arg got {first_arg}"), + } + } } #[cfg(test)] diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index d9ce299a2602..5b80c908cfc3 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -32,7 +32,6 @@ use datafusion_expr::ColumnarValue; use std::sync::Arc; use std::{fmt, str::FromStr}; -use datafusion_expr::TypeSignature::*; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use std::any::Any; @@ -49,17 +48,8 @@ impl Default for EncodeFunc { impl EncodeFunc { pub fn new() -> Self { - use DataType::*; Self { - signature: Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![Binary, Utf8]), - Exact(vec![LargeBinary, Utf8]), - ], - Volatility::Immutable, - ), + signature: Signature::user_defined(Volatility::Immutable), } } } @@ -77,23 +67,39 @@ impl ScalarUDFImpl for EncodeFunc { } fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - - Ok(match arg_types[0] { - Utf8 => Utf8, - LargeUtf8 => LargeUtf8, - Binary => Utf8, - LargeBinary => LargeUtf8, - Null => Null, - _ => { - return plan_err!("The encode function can only accept utf8 or binary."); - } - }) + Ok(arg_types[0].to_owned()) } fn invoke(&self, args: &[ColumnarValue]) -> Result { encode(args) } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 2 { + return plan_err!( + "{} expects to get 2 arguments, but got {}", + self.name(), + arg_types.len() + ); + } + + if arg_types[1] != DataType::Utf8 { + return Err(DataFusionError::Plan("2nd argument should be Utf8".into())); + } + + match arg_types[0] { + DataType::Utf8 | DataType::Binary | DataType::Null => { + Ok(vec![DataType::Utf8; 2]) + } + DataType::LargeUtf8 | DataType::LargeBinary => { + Ok(vec![DataType::LargeUtf8, DataType::Utf8]) + } + _ => plan_err!( + "1st argument should be Utf8 or Binary or Null, got {:?}", + arg_types[0] + ), + } + } } #[derive(Debug)] @@ -109,17 +115,8 @@ impl Default for DecodeFunc { impl DecodeFunc { pub fn new() -> Self { - use DataType::*; Self { - signature: Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![Binary, Utf8]), - Exact(vec![LargeBinary, Utf8]), - ], - Volatility::Immutable, - ), + signature: Signature::user_defined(Volatility::Immutable), } } } @@ -137,23 +134,39 @@ impl ScalarUDFImpl for DecodeFunc { } fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - - Ok(match arg_types[0] { - Utf8 => Binary, - LargeUtf8 => LargeBinary, - Binary => Binary, - LargeBinary => LargeBinary, - Null => Null, - _ => { - return plan_err!("The decode function can only accept utf8 or binary."); - } - }) + Ok(arg_types[0].to_owned()) } fn invoke(&self, args: &[ColumnarValue]) -> Result { decode(args) } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 2 { + return plan_err!( + "{} expects to get 2 arguments, but got {}", + self.name(), + arg_types.len() + ); + } + + if arg_types[1] != DataType::Utf8 { + return plan_err!("2nd argument should be Utf8"); + } + + match arg_types[0] { + DataType::Utf8 | DataType::Binary | DataType::Null => { + Ok(vec![DataType::Binary, DataType::Utf8]) + } + DataType::LargeUtf8 | DataType::LargeBinary => { + Ok(vec![DataType::LargeBinary, DataType::Utf8]) + } + _ => plan_err!( + "1st argument should be Utf8 or Binary or Null, got {:?}", + arg_types[0] + ), + } + } } #[derive(Debug, Copy, Clone)] diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 6da67c8a2798..3879f779eb71 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,8 +23,7 @@ use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; use crate::string::common::StringArrayType; use crate::utils::{make_scalar_function, utf8_to_int_type}; -use datafusion_common::{exec_err, Result}; -use datafusion_expr::TypeSignature::Exact; +use datafusion_common::{exec_err, plan_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; #[derive(Debug)] @@ -41,20 +40,8 @@ impl Default for StrposFunc { impl StrposFunc { pub fn new() -> Self { - use DataType::*; Self { - signature: Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![Utf8, LargeUtf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![LargeUtf8, LargeUtf8]), - Exact(vec![Utf8View, Utf8View]), - Exact(vec![Utf8View, Utf8]), - Exact(vec![Utf8View, LargeUtf8]), - ], - Volatility::Immutable, - ), + signature: Signature::user_defined(Volatility::Immutable), aliases: vec![String::from("instr"), String::from("position")], } } @@ -84,6 +71,25 @@ impl ScalarUDFImpl for StrposFunc { fn aliases(&self) -> &[String] { &self.aliases } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + match arg_types { + [first, second ] => { + match (first, second) { + (DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8, DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8) => Ok(arg_types.to_vec()), + (DataType::Null, DataType::Null) => Ok(vec![DataType::Utf8, DataType::Utf8]), + (DataType::Null, _) => Ok(vec![DataType::Utf8, second.to_owned()]), + (_, DataType::Null) => Ok(vec![first.to_owned(), DataType::Utf8]), + (DataType::Dictionary(_, value_type), DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8) => match **value_type { + DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 | DataType::Null | DataType::Binary => Ok(vec![*value_type.clone(), second.to_owned()]), + _ => plan_err!("The STRPOS/INSTR/POSITION function can only accept strings, but got {:?}.", **value_type), + }, + _ => plan_err!("The STRPOS/INSTR/POSITION function can only accept strings, but got {:?}.", arg_types) + } + }, + _ => plan_err!("The STRPOS/INSTR/POSITION function can only accept strings, but got {:?}", arg_types) + } + } } fn strpos(args: &[ArrayRef]) -> Result { diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 934fa4bfe135..205de0b30b9c 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -27,8 +27,7 @@ use arrow::array::{ use arrow::datatypes::DataType; use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; use datafusion_common::cast::as_int64_array; -use datafusion_common::{exec_err, Result}; -use datafusion_expr::TypeSignature::Exact; +use datafusion_common::{exec_err, plan_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; #[derive(Debug)] @@ -45,19 +44,8 @@ impl Default for SubstrFunc { impl SubstrFunc { pub fn new() -> Self { - use DataType::*; Self { - signature: Signature::one_of( - vec![ - Exact(vec![Utf8, Int64]), - Exact(vec![LargeUtf8, Int64]), - Exact(vec![Utf8, Int64, Int64]), - Exact(vec![LargeUtf8, Int64, Int64]), - Exact(vec![Utf8View, Int64]), - Exact(vec![Utf8View, Int64, Int64]), - ], - Volatility::Immutable, - ), + signature: Signature::user_defined(Volatility::Immutable), aliases: vec![String::from("substring")], } } @@ -91,6 +79,65 @@ impl ScalarUDFImpl for SubstrFunc { fn aliases(&self) -> &[String] { &self.aliases } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + let first_data_type = match &arg_types[0] { + DataType::Null => Ok(DataType::Utf8), + DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 => Ok(arg_types[0].clone()), + DataType::Dictionary(key_type, value_type) => { + if key_type.is_integer() { + match value_type.as_ref() { + DataType::Null => Ok(DataType::Utf8), + DataType::LargeUtf8 | DataType::Utf8View | DataType::Utf8 => Ok(*value_type.clone()), + _ => plan_err!( + "The first argument of the {} function can only be a string, but got {:?}.", + self.name(), + arg_types[0] + ), + } + } else { + plan_err!( + "The first argument of the {} function can only be a string, but got {:?}.", + self.name(), + arg_types[0] + ) + } + } + _ => plan_err!( + "The first argument of the {} function can only be a string, but got {:?}.", + self.name(), + arg_types[0] + ) + }?; + + if ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[1]) { + return plan_err!( + "The second argument of the {} function can only be an integer, but got {:?}.", + self.name(), + arg_types[1] + ); + } + + if arg_types.len() == 3 + && ![DataType::Int64, DataType::Int32, DataType::Null].contains(&arg_types[2]) + { + return plan_err!( + "The third argument of the {} function can only be an integer, but got {:?}.", + self.name(), + arg_types[2] + ); + } + + if arg_types.len() == 2 { + Ok(vec![first_data_type.to_owned(), DataType::Int64]) + } else { + Ok(vec![ + first_data_type.to_owned(), + DataType::Int64, + DataType::Int64, + ]) + } + } } /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).) @@ -394,7 +441,7 @@ where ) } else { if start == i64::MIN { - return exec_err!("negative overflow when calculating skip value") + return exec_err!("negative overflow when calculating skip value"); } let (start, end) = get_true_start_end( string, diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt index 7a6ac5ca7121..68bdf78115aa 100644 --- a/datafusion/sqllogictest/test_files/encoding.slt +++ b/datafusion/sqllogictest/test_files/encoding.slt @@ -28,14 +28,14 @@ CREATE TABLE test( ; # errors -query error DataFusion error: Error during planning: The encode function can only accept utf8 or binary\. -select encode(12, 'hex') +query error 1st argument should be Utf8 or Binary or Null, got Int64 +select encode(12, 'hex'); query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex select encode(bin_field, 'non_encoding') from test; -query error DataFusion error: Error during planning: The decode function can only accept utf8 or binary\. -select decode(12, 'hex') +query error 1st argument should be Utf8 or Binary or Null, got Int64 +select decode(12, 'hex'); query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, hex select decode(hex_field, 'non_encoding') from test; diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 228d7ca2f046..e887b1934e04 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -442,6 +442,67 @@ SELECT strpos('joséésoj', NULL) ---- NULL +query T +SELECT substr('alphabet', -3) +---- +alphabet + +query T +SELECT substr('alphabet', 0) +---- +alphabet + +query T +SELECT substr('alphabet', 1) +---- +alphabet + +query T +SELECT substr('alphabet', 2) +---- +lphabet + +query T +SELECT substr('alphabet', 3) +---- +phabet + +query T +SELECT substr('alphabet', 30) +---- +(empty) + +query T +SELECT substr('alphabet', CAST(NULL AS int)) +---- +NULL + +query T +SELECT substr('alphabet', 3, 2) +---- +ph + +query T +SELECT substr('alphabet', 3, 20) +---- +phabet + +query T +SELECT substr('alphabet', CAST(NULL AS int), 20) +---- +NULL + +query T +SELECT substr('alphabet', 3, CAST(NULL AS int)) +---- +NULL + +statement error The first argument of the substr function can only be a string, but got Int64 +SELECT substr(1, 3) + +statement error The first argument of the substr function can only be a string, but got Int64 +SELECT substr(1, 3, 4) + query T SELECT translate('12345', '143', 'ax') ---- diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 6eed72e914bd..3b9c9a16042c 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1907,7 +1907,7 @@ select position('' in '') 1 -query error DataFusion error: Execution error: The STRPOS/INSTR/POSITION function can only accept strings, but got Int64. +query error POSITION function can only accept strings select position(1 in 1) diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 3286ae32d4d8..5df5f313af3c 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -374,7 +374,7 @@ NULL a NULL b 3 c -query TT +query ?T VALUES (NULL,'a'),(NULL,'b'),(NULL,'c') ---- NULL a @@ -395,7 +395,7 @@ VALUES (1,NULL),(2,NULL),(3,'c') 2 NULL 3 c -query IIIIIIIIIIIIITTR +query IIIIIIIIIIIII?TR VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,NULL,'F',3.5) ---- 1 2 3 4 5 6 7 8 9 10 11 12 13 NULL F 3.5 @@ -483,6 +483,21 @@ CREATE TABLE foo2(c1 double, c2 double) AS VALUES (2, 5), (3, 6); +query T +SELECT arrow_typeof(COALESCE(column1, column2)) FROM VALUES (null, 1.2); +---- +Float64 + +# multiple rows and columns with null need type coercion +query TTT +select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from (SELECT column1, column2, column3 FROM VALUES +(null, 2, 'a'), +(1.1, null, 'b'), +(2, 5, null)) LIMIT 1; +---- +Float64 Int64 Utf8 + + # foo distinct query T select distinct '1' from foo; diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 7cb921a6603e..24e03fdb7184 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -123,10 +123,10 @@ SELECT substr('Hello🌏世界', 5, 3) ---- o🌏世 -statement error The SUBSTR function can only accept strings, but got Int64. +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3) -statement error The SUBSTR function can only accept strings, but got Int64. +statement error The first argument of the substr function can only be a string, but got Int64 SELECT substr(1, 3, 4) statement error Execution error: negative substring length not allowed diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 4b11e338da70..7a7a8a8703ec 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -2858,7 +2858,7 @@ statement error select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string'); # invalid argument data type -statement error DataFusion error: Execution error: The to_local_time function can only accept timestamp as the arg, got Utf8 +statement error The to_local_time function can only accept Timestamp as the arg got Utf8 select to_local_time('2024-04-01T00:00:20Z'); # invalid timezone