From 86871a8480e7fcfcc18dea90e842e5e325896ae5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Dec 2023 13:03:57 -0800 Subject: [PATCH 1/5] fix: RANGE frame can be regularized to ROWS frame only if empty ORDER BY clause --- datafusion/expr/src/window_frame.rs | 16 ++++--- datafusion/sqllogictest/test_files/window.slt | 44 ++++++++++++++++--- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 5f161b85dd9a..978f7b508d14 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -148,18 +148,20 @@ impl WindowFrame { pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result { if frame.units == WindowFrameUnits::Range && order_bys != 1 { // Normally, RANGE frames require an ORDER BY clause with exactly one - // column. However, an ORDER BY clause may be absent in two edge cases. + // column. However, an ORDER BY clause may be absent in two edge cases: + // 1. start bound is UNBOUNDED or CURRENT ROW + // 2. end bound is CURRENT ROW or UNBOUNDED. + // In these cases, we regularize the RANGE frame to be equivalent to a ROWS + // frame with the UNBOUNDED bounds. if (frame.start_bound.is_unbounded() || frame.start_bound == WindowFrameBound::CurrentRow) && (frame.end_bound == WindowFrameBound::CurrentRow || frame.end_bound.is_unbounded()) + && order_bys == 0 { - if order_bys == 0 { - frame.units = WindowFrameUnits::Rows; - frame.start_bound = - WindowFrameBound::Preceding(ScalarValue::UInt64(None)); - frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None)); - } + frame.units = WindowFrameUnits::Rows; + frame.start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None)); + frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None)); } else { plan_err!("RANGE requires exactly one ORDER BY column")? } diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index bb6ca119480d..ee5447618506 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1080,7 +1080,7 @@ SELECT 794 95 95 #fn test_window_range_equivalent_frames -query IIIIIII +query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column SELECT c9, COUNT(*) OVER(ORDER BY c9, c1 RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt1, @@ -1092,12 +1092,22 @@ SELECT FROM aggregate_test_100 ORDER BY c9 LIMIT 5 + +query IIII +SELECT + c9, + COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt4, + COUNT(*) OVER(RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cnt5, + COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS cnt6 + FROM aggregate_test_100 + ORDER BY c9 + LIMIT 5 ---- -28774375 1 1 1 100 100 100 -63044568 1 2 1 100 100 100 -141047417 1 3 1 100 100 100 -141680161 1 4 1 100 100 100 -145294611 1 5 1 100 100 100 +28774375 100 100 100 +63044568 100 100 100 +141047417 100 100 100 +141680161 100 100 100 +145294611 100 100 100 #fn test_window_cume_dist query IRR @@ -3727,3 +3737,25 @@ FROM score_board s statement ok DROP TABLE score_board; + +# RANGE frame with/without (multiple) ORDER BY +query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column +select a, + rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q + +query II +select a, + rank() over (partition by a order by a RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q +---- +1 1 +2 1 + +query II +select a, + rank() over (partition by a RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q +---- +1 1 +2 1 From ab0b4866989af479101f834f00b2ee526fb644a5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Dec 2023 14:52:33 -0800 Subject: [PATCH 2/5] Fix flaky test --- datafusion/sqllogictest/test_files/window.slt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index ee5447618506..5db2836d84ad 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -3742,20 +3742,18 @@ DROP TABLE score_board; query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column select a, rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk - from (select 1 a union all select 2 a) q + from (select 1 a) q query II select a, rank() over (partition by a order by a RANGE UNBOUNDED PRECEDING) rnk - from (select 1 a union all select 2 a) q + from (select 1 a) q ---- 1 1 -2 1 query II select a, rank() over (partition by a RANGE UNBOUNDED PRECEDING) rnk - from (select 1 a union all select 2 a) q + from (select 1 a) q ---- 1 1 -2 1 From f12b9e615a7ded5f440420b6a6fe7eb90dc55b30 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Dec 2023 16:12:11 -0800 Subject: [PATCH 3/5] Update test comment --- datafusion/sqllogictest/test_files/window.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 5db2836d84ad..7b1f4aecb9ab 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -3738,7 +3738,7 @@ FROM score_board s statement ok DROP TABLE score_board; -# RANGE frame with/without (multiple) ORDER BY +# RANGE frame can be regularized to ROWS frame only if empty ORDER BY clause query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column select a, rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk From 6926b2fb0efbd85d06d194e210c87471fa9c907d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Dec 2023 16:12:56 -0800 Subject: [PATCH 4/5] Add code comment --- datafusion/expr/src/window_frame.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 978f7b508d14..575d4d766b0a 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -153,6 +153,7 @@ pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result Date: Sun, 3 Dec 2023 21:43:18 -0800 Subject: [PATCH 5/5] Update --- datafusion/expr/src/window_frame.rs | 17 +++-- datafusion/sqllogictest/test_files/window.slt | 74 +++++++++++++------ 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 575d4d766b0a..2a64f21b856b 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -148,7 +148,8 @@ impl WindowFrame { pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result { if frame.units == WindowFrameUnits::Range && order_bys != 1 { // Normally, RANGE frames require an ORDER BY clause with exactly one - // column. However, an ORDER BY clause may be absent in two edge cases: + // column. However, an ORDER BY clause may be absent or present but with + // more than one column in two edge cases: // 1. start bound is UNBOUNDED or CURRENT ROW // 2. end bound is CURRENT ROW or UNBOUNDED. // In these cases, we regularize the RANGE frame to be equivalent to a ROWS @@ -158,11 +159,17 @@ pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result