diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index f719a33fb624..8ea7c2951223 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -19,7 +19,7 @@ use datafusion::error::Result; use datafusion::prelude::*; use datafusion::sql::unparser::expr_to_sql; -use datafusion_sql::unparser::dialect::CustomDialect; +use datafusion_sql::unparser::dialect::CustomDialectBuilder; use datafusion_sql::unparser::{plan_to_sql, Unparser}; /// This example demonstrates the programmatic construction of SQL strings using @@ -80,7 +80,9 @@ fn simple_expr_to_pretty_sql_demo() -> Result<()> { /// using a custom dialect and an explicit unparser fn simple_expr_to_sql_demo_escape_mysql_style() -> Result<()> { let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8))); - let dialect = CustomDialect::new(Some('`')); + let dialect = CustomDialectBuilder::new() + .with_identifier_quote_style('`') + .build(); let unparser = Unparser::new(&dialect); let sql = unparser.expr_to_sql(&expr)?.to_string(); assert_eq!(sql, r#"((`a` < 5) OR (`a` = 8))"#); diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index e8cbde058566..eca2eb4fd0ec 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -35,7 +35,33 @@ pub trait Dialect { fn supports_nulls_first_in_sort(&self) -> bool { true } + + // Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME? + // E.g. Trino, Athena and Dremio does not have DATETIME data type + fn use_timestamp_for_date64(&self) -> bool { + false + } + + fn interval_style(&self) -> IntervalStyle { + IntervalStyle::PostgresVerbose + } } + +/// `IntervalStyle` to use for unparsing +/// +/// +/// different DBMS follows different standards, popular ones are: +/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is +/// compatible with arrow display format, as well as duckdb +/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time +/// +#[derive(Clone, Copy)] +pub enum IntervalStyle { + PostgresVerbose, + SQLStandard, + MySQL, +} + pub struct DefaultDialect {} impl Dialect for DefaultDialect { @@ -57,6 +83,10 @@ impl Dialect for PostgreSqlDialect { fn identifier_quote_style(&self, _: &str) -> Option { Some('"') } + + fn interval_style(&self) -> IntervalStyle { + IntervalStyle::PostgresVerbose + } } pub struct MySqlDialect {} @@ -69,6 +99,10 @@ impl Dialect for MySqlDialect { fn supports_nulls_first_in_sort(&self) -> bool { false } + + fn interval_style(&self) -> IntervalStyle { + IntervalStyle::MySQL + } } pub struct SqliteDialect {} @@ -81,12 +115,29 @@ impl Dialect for SqliteDialect { pub struct CustomDialect { identifier_quote_style: Option, + supports_nulls_first_in_sort: bool, + use_timestamp_for_date64: bool, + interval_style: IntervalStyle, +} + +impl Default for CustomDialect { + fn default() -> Self { + Self { + identifier_quote_style: None, + supports_nulls_first_in_sort: true, + use_timestamp_for_date64: false, + interval_style: IntervalStyle::SQLStandard, + } + } } impl CustomDialect { + // create a CustomDialect + #[deprecated(note = "please use `CustomDialectBuilder` instead")] pub fn new(identifier_quote_style: Option) -> Self { Self { identifier_quote_style, + ..Default::default() } } } @@ -95,4 +146,93 @@ impl Dialect for CustomDialect { fn identifier_quote_style(&self, _: &str) -> Option { self.identifier_quote_style } + + fn supports_nulls_first_in_sort(&self) -> bool { + self.supports_nulls_first_in_sort + } + + fn use_timestamp_for_date64(&self) -> bool { + self.use_timestamp_for_date64 + } + + fn interval_style(&self) -> IntervalStyle { + self.interval_style + } +} + +/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern +/// +/// +/// # Examples +/// +/// Building a custom dialect with all default options set in CustomDialectBuilder::new() +/// but with `use_timestamp_for_date64` overridden to `true` +/// +/// ``` +/// use datafusion_sql::unparser::dialect::CustomDialectBuilder; +/// let dialect = CustomDialectBuilder::new() +/// .with_use_timestamp_for_date64(true) +/// .build(); +/// ``` +pub struct CustomDialectBuilder { + identifier_quote_style: Option, + supports_nulls_first_in_sort: bool, + use_timestamp_for_date64: bool, + interval_style: IntervalStyle, +} + +impl Default for CustomDialectBuilder { + fn default() -> Self { + Self::new() + } +} + +impl CustomDialectBuilder { + pub fn new() -> Self { + Self { + identifier_quote_style: None, + supports_nulls_first_in_sort: true, + use_timestamp_for_date64: false, + interval_style: IntervalStyle::PostgresVerbose, + } + } + + pub fn build(self) -> CustomDialect { + CustomDialect { + identifier_quote_style: self.identifier_quote_style, + supports_nulls_first_in_sort: self.supports_nulls_first_in_sort, + use_timestamp_for_date64: self.use_timestamp_for_date64, + interval_style: self.interval_style, + } + } + + /// Customize the dialect with a specific identifier quote style, e.g. '`', '"' + pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self { + self.identifier_quote_style = Some(identifier_quote_style); + self + } + + /// Customize the dialect to supports `NULLS FIRST` in `ORDER BY` clauses + pub fn with_supports_nulls_first_in_sort( + mut self, + supports_nulls_first_in_sort: bool, + ) -> Self { + self.supports_nulls_first_in_sort = supports_nulls_first_in_sort; + self + } + + /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME + pub fn with_use_timestamp_for_date64( + mut self, + use_timestamp_for_date64: bool, + ) -> Self { + self.use_timestamp_for_date64 = use_timestamp_for_date64; + self + } + + /// Customize the dialect with a specific interval style listed in `IntervalStyle` + pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self { + self.interval_style = interval_style; + self + } } diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index eb149c819c8b..6b7775ee3d4d 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -43,6 +43,7 @@ use datafusion_expr::{ Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; +use super::dialect::IntervalStyle; use super::Unparser; /// DataFusion's Exprs can represent either an `Expr` or an `OrderByExpr` @@ -541,6 +542,14 @@ impl Unparser<'_> { } } + fn ast_type_for_date64_in_cast(&self) -> ast::DataType { + if self.dialect.use_timestamp_for_date64() { + ast::DataType::Timestamp(None, ast::TimezoneInfo::None) + } else { + ast::DataType::Datetime(None) + } + } + fn col_to_sql(&self, col: &Column) -> Result { if let Some(table_ref) = &col.relation { let mut id = table_ref.to_vec(); @@ -1003,7 +1012,7 @@ impl Unparser<'_> { expr: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( datetime.to_string(), ))), - data_type: ast::DataType::Datetime(None), + data_type: self.ast_type_for_date64_in_cast(), format: None, }) } @@ -1055,22 +1064,7 @@ impl Unparser<'_> { ScalarValue::IntervalYearMonth(Some(_)) | ScalarValue::IntervalDayTime(Some(_)) | ScalarValue::IntervalMonthDayNano(Some(_)) => { - let wrap_array = v.to_array()?; - let Some(result) = array_value_to_string(&wrap_array, 0).ok() else { - return internal_err!( - "Unable to convert interval scalar value to string" - ); - }; - let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString( - result.to_uppercase(), - ))), - leading_field: None, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - }; - Ok(ast::Expr::Interval(interval)) + self.interval_scalar_to_sql(v) } ScalarValue::IntervalYearMonth(None) => { Ok(ast::Expr::Value(ast::Value::Null)) @@ -1108,6 +1102,123 @@ impl Unparser<'_> { } } + fn interval_scalar_to_sql(&self, v: &ScalarValue) -> Result { + match self.dialect.interval_style() { + IntervalStyle::PostgresVerbose => { + let wrap_array = v.to_array()?; + let Some(result) = array_value_to_string(&wrap_array, 0).ok() else { + return internal_err!( + "Unable to convert interval scalar value to string" + ); + }; + let interval = Interval { + value: Box::new(ast::Expr::Value(SingleQuotedString( + result.to_uppercase(), + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) + } + // If the interval standard is SQLStandard, implement a simple unparse logic + IntervalStyle::SQLStandard => match v { + ScalarValue::IntervalYearMonth(v) => { + let Some(v) = v else { + return Ok(ast::Expr::Value(ast::Value::Null)); + }; + let interval = Interval { + value: Box::new(ast::Expr::Value( + ast::Value::SingleQuotedString(v.to_string()), + )), + leading_field: Some(ast::DateTimeField::Month), + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) + } + ScalarValue::IntervalDayTime(v) => { + let Some(v) = v else { + return Ok(ast::Expr::Value(ast::Value::Null)); + }; + let days = v.days; + let secs = v.milliseconds / 1_000; + let mins = secs / 60; + let hours = mins / 60; + + let secs = secs - (mins * 60); + let mins = mins - (hours * 60); + + let millis = v.milliseconds % 1_000; + let interval = Interval { + value: Box::new(ast::Expr::Value( + ast::Value::SingleQuotedString(format!( + "{days} {hours}:{mins}:{secs}.{millis:3}" + )), + )), + leading_field: Some(ast::DateTimeField::Day), + leading_precision: None, + last_field: Some(ast::DateTimeField::Second), + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) + } + ScalarValue::IntervalMonthDayNano(v) => { + let Some(v) = v else { + return Ok(ast::Expr::Value(ast::Value::Null)); + }; + + if v.months >= 0 && v.days == 0 && v.nanoseconds == 0 { + let interval = Interval { + value: Box::new(ast::Expr::Value( + ast::Value::SingleQuotedString(v.months.to_string()), + )), + leading_field: Some(ast::DateTimeField::Month), + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) + } else if v.months == 0 + && v.days >= 0 + && v.nanoseconds % 1_000_000 == 0 + { + let days = v.days; + let secs = v.nanoseconds / 1_000_000_000; + let mins = secs / 60; + let hours = mins / 60; + + let secs = secs - (mins * 60); + let mins = mins - (hours * 60); + + let millis = (v.nanoseconds % 1_000_000_000) / 1_000_000; + + let interval = Interval { + value: Box::new(ast::Expr::Value( + ast::Value::SingleQuotedString(format!( + "{days} {hours}:{mins}:{secs}.{millis:03}" + )), + )), + leading_field: Some(ast::DateTimeField::Day), + leading_precision: None, + last_field: Some(ast::DateTimeField::Second), + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) + } else { + not_impl_err!("Unsupported IntervalMonthDayNano scalar with both Month and DayTime for IntervalStyle::SQLStandard") + } + } + _ => Ok(ast::Expr::Value(ast::Value::Null)), + }, + IntervalStyle::MySQL => { + not_impl_err!("Unsupported interval scalar for IntervalStyle::MySQL") + } + } + } + fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result { match data_type { DataType::Null => { @@ -1136,7 +1247,7 @@ impl Unparser<'_> { Ok(ast::DataType::Timestamp(None, tz_info)) } DataType::Date32 => Ok(ast::DataType::Date), - DataType::Date64 => Ok(ast::DataType::Datetime(None)), + DataType::Date64 => Ok(self.ast_type_for_date64_in_cast()), DataType::Time32(_) => { not_impl_err!("Unsupported DataType: conversion: {data_type:?}") } @@ -1232,7 +1343,7 @@ mod tests { use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::sum; - use crate::unparser::dialect::CustomDialect; + use crate::unparser::dialect::{CustomDialect, CustomDialectBuilder}; use super::*; @@ -1595,46 +1706,7 @@ mod tests { ), (col("need-quoted").eq(lit(1)), r#"("need-quoted" = 1)"#), (col("need quoted").eq(lit(1)), r#"("need quoted" = 1)"#), - ( - interval_month_day_nano_lit( - "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND", - ), - r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#, - ), - ( - interval_month_day_nano_lit("1.5 MONTH"), - r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, - ), - ( - interval_month_day_nano_lit("-3 MONTH"), - r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, - ), - ( - interval_month_day_nano_lit("1 MONTH") - .add(interval_month_day_nano_lit("1 DAY")), - r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, - ), - ( - interval_month_day_nano_lit("1 MONTH") - .sub(interval_month_day_nano_lit("1 DAY")), - r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, - ), - ( - interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"), - r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#, - ), - ( - interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"), - r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#, - ), - ( - interval_year_month_lit("1 YEAR 1 MONTH"), - r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, - ), - ( - interval_year_month_lit("1.5 YEAR 1 MONTH"), - r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, - ), + // See test_interval_scalar_to_expr for interval literals ( (col("a") + col("b")).gt(Expr::Literal(ScalarValue::Decimal128( Some(100123), @@ -1690,8 +1762,10 @@ mod tests { } #[test] - fn custom_dialect() -> Result<()> { - let dialect = CustomDialect::new(Some('\'')); + fn custom_dialect_with_identifier_quote_style() -> Result<()> { + let dialect = CustomDialectBuilder::new() + .with_identifier_quote_style('\'') + .build(); let unparser = Unparser::new(&dialect); let expr = col("a").gt(lit(4)); @@ -1706,8 +1780,8 @@ mod tests { } #[test] - fn custom_dialect_none() -> Result<()> { - let dialect = CustomDialect::new(None); + fn custom_dialect_without_identifier_quote_style() -> Result<()> { + let dialect = CustomDialect::default(); let unparser = Unparser::new(&dialect); let expr = col("a").gt(lit(4)); @@ -1720,4 +1794,143 @@ mod tests { Ok(()) } + + #[test] + fn custom_dialect_use_timestamp_for_date64() -> Result<()> { + for (use_timestamp_for_date64, identifier) in + [(false, "DATETIME"), (true, "TIMESTAMP")] + { + let dialect = CustomDialectBuilder::new() + .with_use_timestamp_for_date64(use_timestamp_for_date64) + .build(); + let unparser = Unparser::new(&dialect); + + let expr = Expr::Cast(Cast { + expr: Box::new(col("a")), + data_type: DataType::Date64, + }); + let ast = unparser.expr_to_sql(&expr)?; + + let actual = format!("{}", ast); + + let expected = format!(r#"CAST(a AS {identifier})"#); + assert_eq!(actual, expected); + } + Ok(()) + } + + #[test] + fn customer_dialect_support_nulls_first_in_ort() -> Result<()> { + let tests: Vec<(Expr, &str, bool)> = vec![ + (col("a").sort(true, true), r#"a ASC NULLS FIRST"#, true), + (col("a").sort(true, true), r#"a ASC"#, false), + ]; + + for (expr, expected, supports_nulls_first_in_sort) in tests { + let dialect = CustomDialectBuilder::new() + .with_supports_nulls_first_in_sort(supports_nulls_first_in_sort) + .build(); + let unparser = Unparser::new(&dialect); + let ast = unparser.expr_to_unparsed(&expr)?; + + let actual = format!("{}", ast); + + assert_eq!(actual, expected); + } + + Ok(()) + } + + #[test] + fn test_interval_scalar_to_expr() { + let tests = [ + ( + interval_month_day_nano_lit("1 MONTH"), + IntervalStyle::SQLStandard, + "INTERVAL '1' MONTH", + ), + ( + interval_month_day_nano_lit("1.5 DAY"), + IntervalStyle::SQLStandard, + "INTERVAL '1 12:0:0.000' DAY TO SECOND", + ), + ( + interval_month_day_nano_lit("1.51234 DAY"), + IntervalStyle::SQLStandard, + "INTERVAL '1 12:17:46.176' DAY TO SECOND", + ), + ( + interval_datetime_lit("1.51234 DAY"), + IntervalStyle::SQLStandard, + "INTERVAL '1 12:17:46.176' DAY TO SECOND", + ), + ( + interval_year_month_lit("1 YEAR"), + IntervalStyle::SQLStandard, + "INTERVAL '12' MONTH", + ), + ( + interval_month_day_nano_lit( + "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND", + ), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#, + ), + ( + interval_month_day_nano_lit("1.5 MONTH"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, + ), + ( + interval_month_day_nano_lit("-3 MONTH"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, + ), + ( + interval_month_day_nano_lit("1 MONTH") + .add(interval_month_day_nano_lit("1 DAY")), + IntervalStyle::PostgresVerbose, + r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, + ), + ( + interval_month_day_nano_lit("1 MONTH") + .sub(interval_month_day_nano_lit("1 DAY")), + IntervalStyle::PostgresVerbose, + r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, + ), + ( + interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#, + ), + ( + interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#, + ), + ( + interval_year_month_lit("1 YEAR 1 MONTH"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, + ), + ( + interval_year_month_lit("1.5 YEAR 1 MONTH"), + IntervalStyle::PostgresVerbose, + r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, + ), + ]; + + for (value, style, expected) in tests { + let dialect = CustomDialectBuilder::new() + .with_interval_style(style) + .build(); + let unparser = Unparser::new(&dialect); + + let ast = unparser.expr_to_sql(&value).expect("to be unparsed"); + + let actual = format!("{ast}"); + + assert_eq!(actual, expected); + } + } }