diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index b90aeffb0769..7e3871e6b795 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -204,6 +204,11 @@ config_namespace! { /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. pub dialect: String, default = "generic".to_string() + /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but + /// ignore the length. If false, error if a `VARCHAR` with a length is + /// specified. The Arrow type system does not have a notion of maximum + /// string length and thus DataFusion can not enforce such limits. + pub support_varchar_with_length: bool, default = true } } @@ -303,6 +308,7 @@ config_namespace! { /// statistics into the same file groups. /// Currently experimental pub split_file_groups_by_statistics: bool, default = false + } } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 2b7867e72046..0b880ddbf81b 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -615,6 +615,7 @@ impl SessionState { ParserOptions { parse_float_as_decimal: sql_parser_options.parse_float_as_decimal, enable_ident_normalization: sql_parser_options.enable_ident_normalization, + support_varchar_with_length: sql_parser_options.support_varchar_with_length, } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 63ef86446aaf..00f221200624 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -103,6 +103,7 @@ pub trait ContextProvider { pub struct ParserOptions { pub parse_float_as_decimal: bool, pub enable_ident_normalization: bool, + pub support_varchar_with_length: bool, } impl Default for ParserOptions { @@ -110,6 +111,7 @@ impl Default for ParserOptions { Self { parse_float_as_decimal: false, enable_ident_normalization: true, + support_varchar_with_length: true, } } } @@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | SQLDataType::UnsignedInt4(_) => { Ok(DataType::UInt32) } + SQLDataType::Varchar(length) => { + match (length, self.options.support_varchar_with_length) { + (Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"), + _ => Ok(DataType::Utf8), + } + } SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64), SQLDataType::Float(_) => Ok(DataType::Float32), SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32), SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64), SQLDataType::Char(_) - | SQLDataType::Varchar(_) | SQLDataType::Text | SQLDataType::String(_) => Ok(DataType::Utf8), SQLDataType::Timestamp(None, tz_info) => { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index f196d71d41de..e72a439b323b 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -84,6 +84,7 @@ fn parse_decimals() { ParserOptions { parse_float_as_decimal: true, enable_ident_normalization: false, + support_varchar_with_length: false, }, ); } @@ -137,6 +138,7 @@ fn parse_ident_normalization() { ParserOptions { parse_float_as_decimal: false, enable_ident_normalization, + support_varchar_with_length: false, }, ); if plan.is_ok() { diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 6f31973fdb6f..3cc837aa8ee9 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.parse_float_as_decimal false +datafusion.sql_parser.support_varchar_with_length true # show all variables with verbose query TTT rowsort @@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When set to true, the phy datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type +datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. # show_variable_in_config_options query TT diff --git a/datafusion/sqllogictest/test_files/strings.slt b/datafusion/sqllogictest/test_files/strings.slt index 27ed0e2d0983..3cd6c339b44f 100644 --- a/datafusion/sqllogictest/test_files/strings.slt +++ b/datafusion/sqllogictest/test_files/strings.slt @@ -78,3 +78,52 @@ e1 p2 p2e1 p2m1e1 + +## VARCHAR with length support + +# Lengths can be used by default +query T +SELECT '12345'::VARCHAR(2); +---- +12345 + +# Lengths can not be used when the config setting is disabled + +statement ok +set datafusion.sql_parser.support_varchar_with_length = false; + +query error +SELECT '12345'::VARCHAR(2); + +query error +SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s); + +statement ok +create table vals(s char) as values('abc'), ('def'); + +query error +SELECT s::VARCHAR(2) FROM vals + +# Lengths can be used when the config setting is enabled + +statement ok +set datafusion.sql_parser.support_varchar_with_length = true; + +query T +SELECT '12345'::VARCHAR(2) +---- +12345 + +query T +SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s) +---- +12345 + +query T +SELECT s::VARCHAR(2) FROM vals +---- +abc +def + +statement ok +drop table vals; diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 80d88632ffdb..c5f22725e0a3 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | +| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |